GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( e4b26c...e987f0 )
by Sabiha
02:06
created

core.data_frame()   A

Complexity

Conditions 1

Size

Total Lines 16
Code Lines 5

Duplication

Lines 16
Ratio 100 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 2
dl 16
loc 16
rs 10
c 0
b 0
f 0
1
# This is a tool to automate cyclic voltametry analysis.
2
# Current Version = 1
3
4
import pandas as pd
5
import numpy as np
6
import csv
7
import matplotlib.pyplot as plt
8
import warnings
9
import matplotlib.cbook
10
import peakutils
11
import copy
12
from matplotlib import rcParams
13
14
15 View Code Duplication
def read_cycle(data):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
16
    """This function reads a segment of datafile (corresponding a cycle)
17
    and generates a dataframe with columns 'Potential' and 'Current'
18
19
    Parameters
20
    __________
21
    data: segment of data file
22
23
    Returns
24
    _______
25
    A dataframe with potential and current columns  
26
    """     
27
28
    current = []
29
    potential = []
30
    for i in data[3:]:
31
        current.append(float(i.split("\t")[4]))
32
        potential.append(float(i.split("\t")[3]))
33
    zippedList = list(zip(potential, current))
34
    df = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
35
    return df
36
37
38 View Code Duplication
def read_file(file):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
39
    """This function reads the raw data file, gets the scanrate and stepsize
40
    and then reads the lines according to cycle number. Once it reads the data
41
    for one cycle, it calls read_cycle function to generate a dataframe. It 
42
    does the same thing for all the cycles and finally returns a dictionary,
43
    the keys of which are the cycle numbers and the values are the 
44
    corresponding dataframes.
45
46
    Parameters
47
    __________
48
    file: raw data file
49
50
    Returns:
51
    ________
52
    dict_of_df: dictionary of dataframes with keys = cycle numbers and
53
    values = dataframes for each cycle
54
    n_cycle: number of cycles in the raw file  
55
    """   
56
    dict_of_df = {} 
57
    h = 0
58
    l = 0
59
    n_cycle = 0
60
    #a = []
61
    with open(file, 'rt') as f:
62
        print(file + ' Opened')
63
        for line in f:
64
            record = 0
65
            if not (h and l):
66
                if line.startswith('SCANRATE'):
67
                    scan_rate = float(line.split()[2])
68
                    h = 1
69
                if line.startswith('STEPSIZE'):
70
                    step_size = float(line.split()[2])
71
                    l = 1
72
            if line.startswith('CURVE'):
73
                n_cycle += 1
74
                if n_cycle > 1:
75
                    number = n_cycle - 1
76
                    df = read_cycle(a)
0 ignored issues
show
introduced by
The variable a does not seem to be defined for all execution paths.
Loading history...
77
                    key_name = 'cycle_' + str(number)
78
                    #key_name = number
79
                    dict_of_df[key_name] = copy.deepcopy(df)
80
                a = []
81
            if n_cycle:
82
                a.append(line)
83
    return dict_of_df, number
0 ignored issues
show
introduced by
The variable number does not seem to be defined for all execution paths.
Loading history...
84
85
86
#df = pd.DataFrame(list(dict1['df_1'].items()))
87
#list1, list2 = list(dict1['df_1'].items())
88
#list1, list2 = list(dict1.get('df_'+str(1)))
89
90 View Code Duplication
def data_frame(dict_cycle, n):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
91
    """Reads the dictionary of dataframes and returns dataframes for each cycle
92
93
    Parameters
94
    __________
95
    dict_cycle: Dictionary of dataframes
96
    n: cycle number
97
98
    Returns:
99
    _______
100
    Dataframe correcponding to the cycle number 
101
    """
102
    list1, list2 = (list(dict_cycle.get('cycle_'+str(n)).items()))
103
    zippedList = list(zip(list1[1], list2[1]))
104
    data  = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
105
    return data
106
107
108 View Code Duplication
def plot_fig(dict_cycle, n):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
109
    """For basic plotting of the cycle data
110
  
111
    Parameters
112
    __________
113
    dict: dictionary of dataframes for all the cycles
114
    n: number of cycles
115
116
    Saves the plot in a file called cycle.png 
117
    """
118
119
    for i in range(n):
120
        print(i+1)
121
        df = data_frame(dict_cycle, i+1)
122
        plt.plot(df.Potential, df.Current, label = "Cycle{}".format(i+1))
123
        
124
    #print(df.head())
125
    plt.xlabel('Voltage')
126
    plt.ylabel('Current')
127
    plt.legend()
128
    plt.savefig('cycle.png')
129
    print('executed')
130
131
132
#split forward and backward sweping data, to make it easier for processing.
133
def split(vector):
134
    """
135
    This function takes an array and splits it into equal two half.
136
    ----------
137
    Parameters
138
    ----------
139
    vector : Can be in any form of that can be turned into numpy array.
140
    Normally, for the use of this function, it expects pandas DataFrame column.
141
    For example, df['potentials'] could be input as the column of x data.
142
    -------
143
    Returns
144
    -------
145
    This function returns two equally splited vector. 
146
    The output then can be used to ease the implementation of peak detection and baseline finding.
147
    """
148
    assert type(vector) == pd.core.series.Series, "Input of the function should be pandas series"
149
    split = int(len(vector)/2)
150
    end = int(len(vector))
151
    vector1 = np.array(vector)[0:split]
152
    vector2 = np.array(vector)[split:end]
153
    return vector1, vector2
154
155
156
def critical_idx(x, y): ## Finds index where data set is no longer linear 
157
    """
158
    This function takes x and y values callculate the derrivative of x and y, and calculate moving average of 5 and 15 points.
159
    Finds intercepts of different moving average curves and return the indexs of the first intercepts.
160
    ----------
161
    Parameters
162
    ----------
163
    x : Numpy array.
164
    y : Numpy array.
165
    Normally, for the use of this function, it expects numpy array that came out from split function.
166
    For example, output of split.df['potentials'] could be input for this function as x.
167
    -------
168
    Returns
169
    -------
170
    This function returns 5th index of the intercepts of different moving average curves. 
171
    User can change this function according to baseline branch method 2 to get various indexes..
172
    """
173
    assert type(x) == np.ndarray, "Input of the function should be numpy array"
174
    assert type(y) == np.ndarray, "Input of the function should be numpy array"
175
    if x.shape[0] != y.shape[0]:
176
        raise ValueError("x and y must have same first dimension, but "
177
                        "have shapes {} and {}".format(x.shape, y.shape))
178
    k = np.diff(y)/(np.diff(x)) #calculated slops of x and y
179
    ## Calculate moving average for 10 and 15 points.
180
    ## This two arbitrary number can be tuned to get better fitting.
181
    ave10 = []
182
    ave15 = []
183
    for i in range(len(k)-10):
184
    # The reason to minus 10 is to prevent j from running out of index.
185
        a = 0 
186
        for j in range(0,5):
187
            a = a + k[i+j]
188
        ave10.append(round(a/10, 5)) 
189
    # keeping 5 desimal points for more accuracy
190
    # This numbers affect how sensitive to noise.
191
    for i in range(len(k)-15): 
192
        b = 0 
193
        for j in range(0,15):
194
            b = b + k[i+j]
195
        ave15.append(round(b/15, 5))
196
    ave10i = np.asarray(ave10)
197
    ave15i = np.asarray(ave15)
198
    ## Find intercepts of different moving average curves
199
    #reshape into one row. 
200
    idx = np.argwhere(np.diff(np.sign(ave15i - ave10i[:len(ave15i)])!= 0)).reshape(-1)+0
201
    return idx[5]
202
# This is based on the method 1 where user can't choose the baseline.
203
# If wanted to add that, choose method2.
204
205
206
def sum_mean(vector):
207
    """
208
    This function returns the mean and sum of the given vector. 
209
    ----------                                                                                                             
210
    Parameters
211
    ----------
212
    vector : Can be in any form of that can be turned into numpy array.
213
    Normally, for the use of this function, it expects pandas DataFrame column.
214
    For example, df['potentials'] could be input as the column of x data.
215
    """
216
    assert type(vector) == np.ndarray, "Input of the function should be numpy array"
217
    a = 0
218
    for i in vector:
219
        a = a + i
220
    return [a,a/len(vector)]
221
222
223
def multiplica(vector_x, vector_y):
224
    """
225
    This function returns the sum of the multilica of two given vector. 
226
    ----------                                                                                                             
227
    Parameters
228
    ----------
229
    vector_x, vector_y : Output of the split vector function.
230
    Two inputs can be the same vector or different vector with same length.
231
    -------
232
    Returns
233
    -------
234
    This function returns a number that is the sum of multiplicity of given two vector.
235
    """
236
    assert type(vector_x) == np.ndarray, "Input of the function should be numpy array"
237
    assert type(vector_y) == np.ndarray, "Input of the function should be numpy array"
238
    a = 0
239
    for x,y in zip(vector_x, vector_y):
240
        a = a + (x * y)
241
    return a
242
243
def linear_coeff(x, y):
244
    """
245
    This function returns the inclination coeffecient and y axis interception coeffecient m and b. 
246
    ----------                                                                                                             
247
    Parameters
248
    ----------
249
    x : Output of the split vector function.
250
    y : Output of the split vector function.
251
    -------
252
    Returns
253
    -------
254
    float number of m and b.
255
    """
256
    m = (multiplica(x,y) - sum_mean(x)[0] * sum_mean(y)[1]) / (multiplica(x,x) - sum_mean(x)[0] * sum_mean(x)[1])  
257
    b = sum_mean(y)[1] - m * sum_mean(x)[1]
258
    return m, b
259
260
261
def y_fitted_line(m, b, x):
262
    """
263
    This function returns the fitted baseline constructed by coeffecient m and b and x values. 
264
    ----------                                                                                                             
265
    Parameters
266
    ----------
267
    x : Output of the split vector function. x value of the input.
268
    m : inclination of the baseline.
269
    b : y intercept of the baseline.
270
    -------
271
    Returns
272
    -------
273
    List of constructed y_labels.
274
    """
275
    y_base = []
276
    for i in x:
277
        y = m * i + b
278
        y_base.append(y)
279
    return y_base
280
281
282
def linear_background(x, y):
283
    """
284
    This function is wrapping function for calculating linear fitted line.
285
    It takes x and y values of the cv data, returns the fitted baseline. 
286
    ----------                                                                                                             
287
    Parameters
288
    ----------
289
    x : Output of the split vector function. x value of the cyclic voltammetry data.
290
    y : Output of the split vector function. y value of the cyclic voltammetry data. 
291
    -------
292
    Returns
293
    -------
294
    List of constructed y_labels.
295
    """
296
    assert type(x) == np.ndarray, "Input of the function should be numpy array"
297
    assert type(y) == np.ndarray, "Input of the function should be numpy array"
298
    idx = critical_idx(x, y) + 5 #this is also arbitrary number we can play with.
299
    m, b = linear_coeff(x[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))], y[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))])
300
    y_base = y_fitted_line(m, b, x)
301
    return y_base
302
303 View Code Duplication
def peak_detection_fxn(data_y):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
304
    """The function takes an input of the column containing the y variables in the dataframe,
305
    associated with the current. The function calls the split function, which splits the
306
    column into two arrays, one of the positive and one of the negative values.
307
    This is because cyclic voltammetry delivers negative peaks, but the peakutils function works
308
    better with positive peaks. The function also runs on the middle 80% of data to eliminate
309
    unnecessary noise and messy values associated with pseudo-peaks.The vectors are then imported
310
    into the peakutils.indexes function to determine the significant peak for each array.
311
    The values are stored in a list, with the first index corresponding to the top peak and the
312
    second corresponding to the bottom peak.
313
    Parameters
314
    ______________
315
    y column: must be a column from a pandas dataframe
316
317
    Returns
318
    _____________
319
    A list with the index of the peaks from the top curve and bottom curve.
320
    """
321
322
    # initialize storage list
323
    index_list = []
324
325
    # split data into above and below the baseline
326
    col_y1, col_y2 = split(data_y) # removed main. head.
327
328
    # detemine length of data and what 10% of the data is
329
    len_y = len(col_y1)
330
    ten_percent = int(np.around(0.1*len_y))
331
332
    # adjust both input columns to be the middle 80% of data
333
    # (take of the first and last 10% of data)
334
    # this avoid detecting peaks from electrolysis
335
    # (from water splitting and not the molecule itself,
336
    # which can form random "peaks")
337
    mod_col_y2 = col_y2[ten_percent:len_y-ten_percent]
338
    mod_col_y1 = col_y1[ten_percent:len_y-ten_percent]
339
340
    # run peakutils package to detect the peaks for both top and bottom
341
    peak_top = peakutils.indexes(mod_col_y2, thres=0.99, min_dist=20)
342
    peak_bottom = peakutils.indexes(abs(mod_col_y1), thres=0.99, min_dist=20)
343
344
    # detemine length of both halves of data
345
    len_top = len(peak_top)
346
    len_bot = len(peak_bottom)
347
348
    # append the values to the storage list
349
    # manipulate values by adding the ten_percent value back
350
    # (as the indecies have moved)
351
    # to detect the actual peaks and not the modified values
352
    index_list.append(peak_top[int(len_top/2)]+ten_percent)
353
    index_list.append(peak_bottom[int(len_bot/2)]+ten_percent)
354
355
    # return storage list
356
    # first value is the top, second value is the bottom
357
    return index_list
358
359
360 View Code Duplication
def peak_values(DataFrame_x, DataFrame_y):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
361
    """Outputs x (potentials) and y (currents) values from data indices
362
        given by peak_detection function.
363
364
       ----------
365
       Parameters
366
       ----------
367
       DataFrame_x : should be in the form of a pandas DataFrame column.
368
         For example, df['potentials'] could be input as the column of x
369
         data.
370
371
        DataFrame_y : should be in the form of a pandas DataFrame column.
372
          For example, df['currents'] could be input as the column of y
373
          data.
374
375
       Returns
376
       -------
377
       Result : numpy array of coordinates at peaks in the following order:
378
         potential of peak on top curve, current of peak on top curve,
379
         potential of peak on bottom curve, current of peak on bottom curve"""
380
    index = peak_detection_fxn(DataFrame_y)
381
    potential1, potential2 = split(DataFrame_x)
382
    current1, current2 = split(DataFrame_y)
383
    Peak_values = []
384
    Peak_values.append(potential2[(index[0])])  # TOPX (bottom part of curve is
385
    # the first part of DataFrame)
386
    Peak_values.append(current2[(index[0])])  # TOPY
387
    Peak_values.append(potential1[(index[1])])  # BOTTOMX
388
    Peak_values.append(current1[(index[1])])  # BOTTOMY
389
    Peak_array = np.array(Peak_values)
390
    return Peak_array
391
392
393
def del_potential(DataFrame_x, DataFrame_y):
394
    """Outputs the difference in potentials between anoidc and
395
       cathodic peaks in cyclic voltammetry data.
396
397
       Parameters
398
       ----------
399
       DataFrame_x : should be in the form of a pandas DataFrame column.
400
         For example, df['potentials'] could be input as the column of x
401
         data.
402
403
        DataFrame_y : should be in the form of a pandas DataFrame column.
404
          For example, df['currents'] could be input as the column of y
405
          data.
406
407
        Returns
408
        -------
409
        Results: difference in peak potentials in the form of a numpy array."""
410
    del_potentials = (peak_values(DataFrame_x, DataFrame_y)[0] -
411
                      peak_values(DataFrame_x, DataFrame_y)[2])
412
    return del_potentials
413
414
415
def half_wave_potential(DataFrame_x, DataFrame_y):
416
    """Outputs the half wave potential(redox potential) from cyclic
417
       voltammetry data.
418
419
       Parameters
420
       ----------
421
       DataFrame_x : should be in the form of a pandas DataFrame column.
422
         For example, df['potentials'] could be input as the column of x
423
         data.
424
425
        DataFrame_y : should be in the form of a pandas DataFrame column.
426
          For example, df['currents'] could be input as the column of y
427
          data.
428
429
       Returns
430
       -------
431
       Results : the half wave potential in the form of a
432
         floating point number."""
433
    half_wave_potential = (del_potential(DataFrame_x, DataFrame_y))/2
434
    return half_wave_potential
435
436
437 View Code Duplication
def peak_heights(DataFrame_x, DataFrame_y):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
438
    """Outputs heights of minimum peak and maximum
439
         peak from cyclic voltammetry data.
440
441
       Parameters
442
       ----------
443
       DataFrame_x : should be in the form of a pandas DataFrame column.
444
         For example, df['potentials'] could be input as the column of x
445
         data.
446
447
        DataFrame_y : should be in the form of a pandas DataFrame column.
448
          For example, df['currents'] could be input as the column of y
449
          data.
450
451
        Returns
452
        -------
453
        Results: height of maximum peak, height of minimum peak
454
          in that order in the form of a list."""
455
    current_max = peak_values(DataFrame_x, DataFrame_y)[1]
456
    current_min = peak_values(DataFrame_x, DataFrame_y)[3]
457
    x1, x2 = split(DataFrame_x)
458
    y1, y2 = split(DataFrame_y)
459
    line_at_min = linear_background(x1, y1)[peak_detection_fxn(DataFrame_y)[1]]
460
    line_at_max = linear_background(x2, y2)[peak_detection_fxn(DataFrame_y)[0]]
461
    height_of_max = current_max - line_at_max
462
    height_of_min = abs(current_min - line_at_min)
463
    return [height_of_max, height_of_min]
464
465
466
def peak_ratio(DataFrame_x, DataFrame_y):
467
    """Outputs the peak ratios from cyclic voltammetry data.
468
469
       Parameters
470
       ----------
471
       DataFrame_x : should be in the form of a pandas DataFrame column.
472
         For example, df['potentials'] could be input as the column of x
473
         data.
474
475
        DataFrame_y : should be in the form of a pandas DataFrame column.
476
          For example, df['currents'] could be input as the column of y
477
          data.
478
479
       Returns
480
       -------
481
       Result : returns a floating point number, the peak ratio."""
482
    ratio = (peak_heights(DataFrame_x, DataFrame_y)[0] /
483
             peak_heights(DataFrame_x, DataFrame_y)[1])
484
    return ratio
485