core.data_frame() - Code Metrics - Inspection of "Updated core with new baseline function" - sabiharustam/voltcycle - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( e4b26c...e987f0 )

by Sabiha

created 2019-03-21 21:01 UTC

core.data_frame() A

↳ Parent: core

Complexity

Conditions

Size

Total Lines	16
Code Lines	5

Duplication

Lines	16
Ratio	100 %

Importance

Changes

Metric	Value
cc	1
eloc	5
nop	2
dl	16
loc	16
rs	10
c	0
b	0
f	0

# This is a tool to automate cyclic voltametry analysis.
# Current Version = 1

import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import warnings
import matplotlib.cbook
import peakutils
import copy
from matplotlib import rcParams


def read_cycle(data):

    """This function reads a segment of datafile (corresponding a cycle)
    and generates a dataframe with columns 'Potential' and 'Current'

    Parameters
    __________
    data: segment of data file

    Returns
    _______
    A dataframe with potential and current columns  
    """     

    current = []
    potential = []
    for i in data[3:]:
        current.append(float(i.split("\t")[4]))
        potential.append(float(i.split("\t")[3]))
    zippedList = list(zip(potential, current))
    df = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
    return df


def read_file(file):

    """This function reads the raw data file, gets the scanrate and stepsize
    and then reads the lines according to cycle number. Once it reads the data
    for one cycle, it calls read_cycle function to generate a dataframe. It 
    does the same thing for all the cycles and finally returns a dictionary,
    the keys of which are the cycle numbers and the values are the 
    corresponding dataframes.

    Parameters
    __________
    file: raw data file

    Returns:
    ________
    dict_of_df: dictionary of dataframes with keys = cycle numbers and
    values = dataframes for each cycle
    n_cycle: number of cycles in the raw file  
    """   
    dict_of_df = {} 
    h = 0
    l = 0
    n_cycle = 0
    #a = []
    with open(file, 'rt') as f:
        print(file + ' Opened')
        for line in f:
            record = 0
            if not (h and l):
                if line.startswith('SCANRATE'):
                    scan_rate = float(line.split()[2])
                    h = 1
                if line.startswith('STEPSIZE'):
                    step_size = float(line.split()[2])
                    l = 1
            if line.startswith('CURVE'):
                n_cycle += 1
                if n_cycle > 1:
                    number = n_cycle - 1
                    df = read_cycle(a)

                    key_name = 'cycle_' + str(number)
                    #key_name = number
                    dict_of_df[key_name] = copy.deepcopy(df)
                a = []
            if n_cycle:
                a.append(line)
    return dict_of_df, number



#df = pd.DataFrame(list(dict1['df_1'].items()))
#list1, list2 = list(dict1['df_1'].items())
#list1, list2 = list(dict1.get('df_'+str(1)))

def data_frame(dict_cycle, n):

    """Reads the dictionary of dataframes and returns dataframes for each cycle

    Parameters
    __________
    dict_cycle: Dictionary of dataframes
    n: cycle number

    Returns:
    _______
    Dataframe correcponding to the cycle number 
    """
    list1, list2 = (list(dict_cycle.get('cycle_'+str(n)).items()))
    zippedList = list(zip(list1[1], list2[1]))
    data  = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
    return data


def plot_fig(dict_cycle, n):

    """For basic plotting of the cycle data
  
    Parameters
    __________
    dict: dictionary of dataframes for all the cycles
    n: number of cycles

    Saves the plot in a file called cycle.png 
    """

    for i in range(n):
        print(i+1)
        df = data_frame(dict_cycle, i+1)
        plt.plot(df.Potential, df.Current, label = "Cycle{}".format(i+1))
        
    #print(df.head())
    plt.xlabel('Voltage')
    plt.ylabel('Current')
    plt.legend()
    plt.savefig('cycle.png')
    print('executed')


#split forward and backward sweping data, to make it easier for processing.
def split(vector):
    """
    This function takes an array and splits it into equal two half.
    ----------
    Parameters
    ----------
    vector : Can be in any form of that can be turned into numpy array.
    Normally, for the use of this function, it expects pandas DataFrame column.
    For example, df['potentials'] could be input as the column of x data.
    -------
    Returns
    -------
    This function returns two equally splited vector. 
    The output then can be used to ease the implementation of peak detection and baseline finding.
    """
    assert type(vector) == pd.core.series.Series, "Input of the function should be pandas series"
    split = int(len(vector)/2)
    end = int(len(vector))
    vector1 = np.array(vector)[0:split]
    vector2 = np.array(vector)[split:end]
    return vector1, vector2


def critical_idx(x, y): ## Finds index where data set is no longer linear 
    """
    This function takes x and y values callculate the derrivative of x and y, and calculate moving average of 5 and 15 points.
    Finds intercepts of different moving average curves and return the indexs of the first intercepts.
    ----------
    Parameters
    ----------
    x : Numpy array.
    y : Numpy array.
    Normally, for the use of this function, it expects numpy array that came out from split function.
    For example, output of split.df['potentials'] could be input for this function as x.
    -------
    Returns
    -------
    This function returns 5th index of the intercepts of different moving average curves. 
    User can change this function according to baseline branch method 2 to get various indexes..
    """
    assert type(x) == np.ndarray, "Input of the function should be numpy array"
    assert type(y) == np.ndarray, "Input of the function should be numpy array"
    if x.shape[0] != y.shape[0]:
        raise ValueError("x and y must have same first dimension, but "
                        "have shapes {} and {}".format(x.shape, y.shape))
    k = np.diff(y)/(np.diff(x)) #calculated slops of x and y
    ## Calculate moving average for 10 and 15 points.
    ## This two arbitrary number can be tuned to get better fitting.
    ave10 = []
    ave15 = []
    for i in range(len(k)-10):
    # The reason to minus 10 is to prevent j from running out of index.
        a = 0 
        for j in range(0,5):
            a = a + k[i+j]
        ave10.append(round(a/10, 5)) 
    # keeping 5 desimal points for more accuracy
    # This numbers affect how sensitive to noise.
    for i in range(len(k)-15): 
        b = 0 
        for j in range(0,15):
            b = b + k[i+j]
        ave15.append(round(b/15, 5))
    ave10i = np.asarray(ave10)
    ave15i = np.asarray(ave15)
    ## Find intercepts of different moving average curves
    #reshape into one row. 
    idx = np.argwhere(np.diff(np.sign(ave15i - ave10i[:len(ave15i)])!= 0)).reshape(-1)+0
    return idx[5]
# This is based on the method 1 where user can't choose the baseline.
# If wanted to add that, choose method2.


def sum_mean(vector):
    """
    This function returns the mean and sum of the given vector. 
    ----------                                                                                                             
    Parameters
    ----------
    vector : Can be in any form of that can be turned into numpy array.
    Normally, for the use of this function, it expects pandas DataFrame column.
    For example, df['potentials'] could be input as the column of x data.
    """
    assert type(vector) == np.ndarray, "Input of the function should be numpy array"
    a = 0
    for i in vector:
        a = a + i
    return [a,a/len(vector)]


def multiplica(vector_x, vector_y):
    """
    This function returns the sum of the multilica of two given vector. 
    ----------                                                                                                             
    Parameters
    ----------
    vector_x, vector_y : Output of the split vector function.
    Two inputs can be the same vector or different vector with same length.
    -------
    Returns
    -------
    This function returns a number that is the sum of multiplicity of given two vector.
    """
    assert type(vector_x) == np.ndarray, "Input of the function should be numpy array"
    assert type(vector_y) == np.ndarray, "Input of the function should be numpy array"
    a = 0
    for x,y in zip(vector_x, vector_y):
        a = a + (x * y)
    return a

def linear_coeff(x, y):
    """
    This function returns the inclination coeffecient and y axis interception coeffecient m and b. 
    ----------                                                                                                             
    Parameters
    ----------
    x : Output of the split vector function.
    y : Output of the split vector function.
    -------
    Returns
    -------
    float number of m and b.
    """
    m = (multiplica(x,y) - sum_mean(x)[0] * sum_mean(y)[1]) / (multiplica(x,x) - sum_mean(x)[0] * sum_mean(x)[1])  
    b = sum_mean(y)[1] - m * sum_mean(x)[1]
    return m, b


def y_fitted_line(m, b, x):
    """
    This function returns the fitted baseline constructed by coeffecient m and b and x values. 
    ----------                                                                                                             
    Parameters
    ----------
    x : Output of the split vector function. x value of the input.
    m : inclination of the baseline.
    b : y intercept of the baseline.
    -------
    Returns
    -------
    List of constructed y_labels.
    """
    y_base = []
    for i in x:
        y = m * i + b
        y_base.append(y)
    return y_base


def linear_background(x, y):
    """
    This function is wrapping function for calculating linear fitted line.
    It takes x and y values of the cv data, returns the fitted baseline. 
    ----------                                                                                                             
    Parameters
    ----------
    x : Output of the split vector function. x value of the cyclic voltammetry data.
    y : Output of the split vector function. y value of the cyclic voltammetry data. 
    -------
    Returns
    -------
    List of constructed y_labels.
    """
    assert type(x) == np.ndarray, "Input of the function should be numpy array"
    assert type(y) == np.ndarray, "Input of the function should be numpy array"
    idx = critical_idx(x, y) + 5 #this is also arbitrary number we can play with.
    m, b = linear_coeff(x[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))], y[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))])
    y_base = y_fitted_line(m, b, x)
    return y_base

def peak_detection_fxn(data_y):

    """The function takes an input of the column containing the y variables in the dataframe,
    associated with the current. The function calls the split function, which splits the
    column into two arrays, one of the positive and one of the negative values.
    This is because cyclic voltammetry delivers negative peaks, but the peakutils function works
    better with positive peaks. The function also runs on the middle 80% of data to eliminate
    unnecessary noise and messy values associated with pseudo-peaks.The vectors are then imported
    into the peakutils.indexes function to determine the significant peak for each array.
    The values are stored in a list, with the first index corresponding to the top peak and the
    second corresponding to the bottom peak.
    Parameters
    ______________
    y column: must be a column from a pandas dataframe

    Returns
    _____________
    A list with the index of the peaks from the top curve and bottom curve.
    """

    # initialize storage list
    index_list = []

    # split data into above and below the baseline
    col_y1, col_y2 = split(data_y) # removed main. head.

    # detemine length of data and what 10% of the data is
    len_y = len(col_y1)
    ten_percent = int(np.around(0.1*len_y))

    # adjust both input columns to be the middle 80% of data
    # (take of the first and last 10% of data)
    # this avoid detecting peaks from electrolysis
    # (from water splitting and not the molecule itself,
    # which can form random "peaks")
    mod_col_y2 = col_y2[ten_percent:len_y-ten_percent]
    mod_col_y1 = col_y1[ten_percent:len_y-ten_percent]

    # run peakutils package to detect the peaks for both top and bottom
    peak_top = peakutils.indexes(mod_col_y2, thres=0.99, min_dist=20)
    peak_bottom = peakutils.indexes(abs(mod_col_y1), thres=0.99, min_dist=20)

    # detemine length of both halves of data
    len_top = len(peak_top)
    len_bot = len(peak_bottom)

    # append the values to the storage list
    # manipulate values by adding the ten_percent value back
    # (as the indecies have moved)
    # to detect the actual peaks and not the modified values
    index_list.append(peak_top[int(len_top/2)]+ten_percent)
    index_list.append(peak_bottom[int(len_bot/2)]+ten_percent)

    # return storage list
    # first value is the top, second value is the bottom
    return index_list


def peak_values(DataFrame_x, DataFrame_y):

    """Outputs x (potentials) and y (currents) values from data indices
        given by peak_detection function.

       ----------
       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

       Returns
       -------
       Result : numpy array of coordinates at peaks in the following order:
         potential of peak on top curve, current of peak on top curve,
         potential of peak on bottom curve, current of peak on bottom curve"""
    index = peak_detection_fxn(DataFrame_y)
    potential1, potential2 = split(DataFrame_x)
    current1, current2 = split(DataFrame_y)
    Peak_values = []
    Peak_values.append(potential2[(index[0])])  # TOPX (bottom part of curve is
    # the first part of DataFrame)
    Peak_values.append(current2[(index[0])])  # TOPY
    Peak_values.append(potential1[(index[1])])  # BOTTOMX
    Peak_values.append(current1[(index[1])])  # BOTTOMY
    Peak_array = np.array(Peak_values)
    return Peak_array


def del_potential(DataFrame_x, DataFrame_y):
    """Outputs the difference in potentials between anoidc and
       cathodic peaks in cyclic voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

        Returns
        -------
        Results: difference in peak potentials in the form of a numpy array."""
    del_potentials = (peak_values(DataFrame_x, DataFrame_y)[0] -
                      peak_values(DataFrame_x, DataFrame_y)[2])
    return del_potentials


def half_wave_potential(DataFrame_x, DataFrame_y):
    """Outputs the half wave potential(redox potential) from cyclic
       voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

       Returns
       -------
       Results : the half wave potential in the form of a
         floating point number."""
    half_wave_potential = (del_potential(DataFrame_x, DataFrame_y))/2
    return half_wave_potential


def peak_heights(DataFrame_x, DataFrame_y):

    """Outputs heights of minimum peak and maximum
         peak from cyclic voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

        Returns
        -------
        Results: height of maximum peak, height of minimum peak
          in that order in the form of a list."""
    current_max = peak_values(DataFrame_x, DataFrame_y)[1]
    current_min = peak_values(DataFrame_x, DataFrame_y)[3]
    x1, x2 = split(DataFrame_x)
    y1, y2 = split(DataFrame_y)
    line_at_min = linear_background(x1, y1)[peak_detection_fxn(DataFrame_y)[1]]
    line_at_max = linear_background(x2, y2)[peak_detection_fxn(DataFrame_y)[0]]
    height_of_max = current_max - line_at_max
    height_of_min = abs(current_min - line_at_min)
    return [height_of_max, height_of_min]


def peak_ratio(DataFrame_x, DataFrame_y):
    """Outputs the peak ratios from cyclic voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

       Returns
       -------
       Result : returns a floating point number, the peak ratio."""
    ratio = (peak_heights(DataFrame_x, DataFrame_y)[0] /
             peak_heights(DataFrame_x, DataFrame_y)[1])
    return ratio


1		# This is a tool to automate cyclic voltametry analysis.
2		# Current Version = 1
3
4		import pandas as pd
5		import numpy as np
6		import csv
7		import matplotlib.pyplot as plt
8		import warnings
9		import matplotlib.cbook
10		import peakutils
11		import copy
12		from matplotlib import rcParams
13
14
15	View Code Duplication	def read_cycle(data):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
16		"""This function reads a segment of datafile (corresponding a cycle)
17		and generates a dataframe with columns 'Potential' and 'Current'
18
19		Parameters
20		__________
21		data: segment of data file
22
23		Returns
24		_______
25		A dataframe with potential and current columns
26		"""
27
28		current = []
29		potential = []
30		for i in data[3:]:
31		current.append(float(i.split("\t")[4]))
32		potential.append(float(i.split("\t")[3]))
33		zippedList = list(zip(potential, current))
34		df = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
35		return df
36
37
38	View Code Duplication	def read_file(file):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
39		"""This function reads the raw data file, gets the scanrate and stepsize
40		and then reads the lines according to cycle number. Once it reads the data
41		for one cycle, it calls read_cycle function to generate a dataframe. It
42		does the same thing for all the cycles and finally returns a dictionary,
43		the keys of which are the cycle numbers and the values are the
44		corresponding dataframes.
45
46		Parameters
47		__________
48		file: raw data file
49
50		Returns:
51		________
52		dict_of_df: dictionary of dataframes with keys = cycle numbers and
53		values = dataframes for each cycle
54		n_cycle: number of cycles in the raw file
55		"""
56		dict_of_df = {}
57		h = 0
58		l = 0
59		n_cycle = 0
60		#a = []
61		with open(file, 'rt') as f:
62		print(file + ' Opened')
63		for line in f:
64		record = 0
65		if not (h and l):
66		if line.startswith('SCANRATE'):
67		scan_rate = float(line.split()[2])
68		h = 1
69		if line.startswith('STEPSIZE'):
70		step_size = float(line.split()[2])
71		l = 1
72		if line.startswith('CURVE'):
73		n_cycle += 1
74		if n_cycle > 1:
75		number = n_cycle - 1
76		df = read_cycle(a)
		0 ignored issues – show introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report The variable `a` does not seem to be defined for all execution paths. Loading history...
77		key_name = 'cycle_' + str(number)
78		#key_name = number
79		dict_of_df[key_name] = copy.deepcopy(df)
80		a = []
81		if n_cycle:
82		a.append(line)
83		return dict_of_df, number
		0 ignored issues – show introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report The variable `number` does not seem to be defined for all execution paths. Loading history...
84
85
86		#df = pd.DataFrame(list(dict1['df_1'].items()))
87		#list1, list2 = list(dict1['df_1'].items())
88		#list1, list2 = list(dict1.get('df_'+str(1)))
89
90	View Code Duplication	def data_frame(dict_cycle, n):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
91		"""Reads the dictionary of dataframes and returns dataframes for each cycle
92
93		Parameters
94		__________
95		dict_cycle: Dictionary of dataframes
96		n: cycle number
97
98		Returns:
99		_______
100		Dataframe correcponding to the cycle number
101		"""
102		list1, list2 = (list(dict_cycle.get('cycle_'+str(n)).items()))
103		zippedList = list(zip(list1[1], list2[1]))
104		data = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
105		return data
106
107
108	View Code Duplication	def plot_fig(dict_cycle, n):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
109		"""For basic plotting of the cycle data
110
111		Parameters
112		__________
113		dict: dictionary of dataframes for all the cycles
114		n: number of cycles
115
116		Saves the plot in a file called cycle.png
117		"""
118
119		for i in range(n):
120		print(i+1)
121		df = data_frame(dict_cycle, i+1)
122		plt.plot(df.Potential, df.Current, label = "Cycle{}".format(i+1))
123
124		#print(df.head())
125		plt.xlabel('Voltage')
126		plt.ylabel('Current')
127		plt.legend()
128		plt.savefig('cycle.png')
129		print('executed')
130
131
132		#split forward and backward sweping data, to make it easier for processing.
133		def split(vector):
134		"""
135		This function takes an array and splits it into equal two half.
136		----------
137		Parameters
138		----------
139		vector : Can be in any form of that can be turned into numpy array.
140		Normally, for the use of this function, it expects pandas DataFrame column.
141		For example, df['potentials'] could be input as the column of x data.
142		-------
143		Returns
144		-------
145		This function returns two equally splited vector.
146		The output then can be used to ease the implementation of peak detection and baseline finding.
147		"""
148		assert type(vector) == pd.core.series.Series, "Input of the function should be pandas series"
149		split = int(len(vector)/2)
150		end = int(len(vector))
151		vector1 = np.array(vector)[0:split]
152		vector2 = np.array(vector)[split:end]
153		return vector1, vector2
154
155
156		def critical_idx(x, y): ## Finds index where data set is no longer linear
157		"""
158		This function takes x and y values callculate the derrivative of x and y, and calculate moving average of 5 and 15 points.
159		Finds intercepts of different moving average curves and return the indexs of the first intercepts.
160		----------
161		Parameters
162		----------
163		x : Numpy array.
164		y : Numpy array.
165		Normally, for the use of this function, it expects numpy array that came out from split function.
166		For example, output of split.df['potentials'] could be input for this function as x.
167		-------
168		Returns
169		-------
170		This function returns 5th index of the intercepts of different moving average curves.
171		User can change this function according to baseline branch method 2 to get various indexes..
172		"""
173		assert type(x) == np.ndarray, "Input of the function should be numpy array"
174		assert type(y) == np.ndarray, "Input of the function should be numpy array"
175		if x.shape[0] != y.shape[0]:
176		raise ValueError("x and y must have same first dimension, but "
177		"have shapes {} and {}".format(x.shape, y.shape))
178		k = np.diff(y)/(np.diff(x)) #calculated slops of x and y
179		## Calculate moving average for 10 and 15 points.
180		## This two arbitrary number can be tuned to get better fitting.
181		ave10 = []
182		ave15 = []
183		for i in range(len(k)-10):
184		# The reason to minus 10 is to prevent j from running out of index.
185		a = 0
186		for j in range(0,5):
187		a = a + k[i+j]
188		ave10.append(round(a/10, 5))
189		# keeping 5 desimal points for more accuracy
190		# This numbers affect how sensitive to noise.
191		for i in range(len(k)-15):
192		b = 0
193		for j in range(0,15):
194		b = b + k[i+j]
195		ave15.append(round(b/15, 5))
196		ave10i = np.asarray(ave10)
197		ave15i = np.asarray(ave15)
198		## Find intercepts of different moving average curves
199		#reshape into one row.
200		idx = np.argwhere(np.diff(np.sign(ave15i - ave10i[:len(ave15i)])!= 0)).reshape(-1)+0
201		return idx[5]
202		# This is based on the method 1 where user can't choose the baseline.
203		# If wanted to add that, choose method2.
204
205
206		def sum_mean(vector):
207		"""
208		This function returns the mean and sum of the given vector.
209		----------
210		Parameters
211		----------
212		vector : Can be in any form of that can be turned into numpy array.
213		Normally, for the use of this function, it expects pandas DataFrame column.
214		For example, df['potentials'] could be input as the column of x data.
215		"""
216		assert type(vector) == np.ndarray, "Input of the function should be numpy array"
217		a = 0
218		for i in vector:
219		a = a + i
220		return [a,a/len(vector)]
221
222
223		def multiplica(vector_x, vector_y):
224		"""
225		This function returns the sum of the multilica of two given vector.
226		----------
227		Parameters
228		----------
229		vector_x, vector_y : Output of the split vector function.
230		Two inputs can be the same vector or different vector with same length.
231		-------
232		Returns
233		-------
234		This function returns a number that is the sum of multiplicity of given two vector.
235		"""
236		assert type(vector_x) == np.ndarray, "Input of the function should be numpy array"
237		assert type(vector_y) == np.ndarray, "Input of the function should be numpy array"
238		a = 0
239		for x,y in zip(vector_x, vector_y):
240		a = a + (x * y)
241		return a
242
243		def linear_coeff(x, y):
244		"""
245		This function returns the inclination coeffecient and y axis interception coeffecient m and b.
246		----------
247		Parameters
248		----------
249		x : Output of the split vector function.
250		y : Output of the split vector function.
251		-------
252		Returns
253		-------
254		float number of m and b.
255		"""
256		m = (multiplica(x,y) - sum_mean(x)[0] * sum_mean(y)[1]) / (multiplica(x,x) - sum_mean(x)[0] * sum_mean(x)[1])
257		b = sum_mean(y)[1] - m * sum_mean(x)[1]
258		return m, b
259
260
261		def y_fitted_line(m, b, x):
262		"""
263		This function returns the fitted baseline constructed by coeffecient m and b and x values.
264		----------
265		Parameters
266		----------
267		x : Output of the split vector function. x value of the input.
268		m : inclination of the baseline.
269		b : y intercept of the baseline.
270		-------
271		Returns
272		-------
273		List of constructed y_labels.
274		"""
275		y_base = []
276		for i in x:
277		y = m * i + b
278		y_base.append(y)
279		return y_base
280
281
282		def linear_background(x, y):
283		"""
284		This function is wrapping function for calculating linear fitted line.
285		It takes x and y values of the cv data, returns the fitted baseline.
286		----------
287		Parameters
288		----------
289		x : Output of the split vector function. x value of the cyclic voltammetry data.
290		y : Output of the split vector function. y value of the cyclic voltammetry data.
291		-------
292		Returns
293		-------
294		List of constructed y_labels.
295		"""
296		assert type(x) == np.ndarray, "Input of the function should be numpy array"
297		assert type(y) == np.ndarray, "Input of the function should be numpy array"
298		idx = critical_idx(x, y) + 5 #this is also arbitrary number we can play with.
299		m, b = linear_coeff(x[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))], y[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))])
300		y_base = y_fitted_line(m, b, x)
301		return y_base
302
303	View Code Duplication	def peak_detection_fxn(data_y):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
304		"""The function takes an input of the column containing the y variables in the dataframe,
305		associated with the current. The function calls the split function, which splits the
306		column into two arrays, one of the positive and one of the negative values.
307		This is because cyclic voltammetry delivers negative peaks, but the peakutils function works
308		better with positive peaks. The function also runs on the middle 80% of data to eliminate
309		unnecessary noise and messy values associated with pseudo-peaks.The vectors are then imported
310		into the peakutils.indexes function to determine the significant peak for each array.
311		The values are stored in a list, with the first index corresponding to the top peak and the
312		second corresponding to the bottom peak.
313		Parameters
314		______________
315		y column: must be a column from a pandas dataframe
316
317		Returns
318		_____________
319		A list with the index of the peaks from the top curve and bottom curve.
320		"""
321
322		# initialize storage list
323		index_list = []
324
325		# split data into above and below the baseline
326		col_y1, col_y2 = split(data_y) # removed main. head.
327
328		# detemine length of data and what 10% of the data is
329		len_y = len(col_y1)
330		ten_percent = int(np.around(0.1*len_y))
331
332		# adjust both input columns to be the middle 80% of data
333		# (take of the first and last 10% of data)
334		# this avoid detecting peaks from electrolysis
335		# (from water splitting and not the molecule itself,
336		# which can form random "peaks")
337		mod_col_y2 = col_y2[ten_percent:len_y-ten_percent]
338		mod_col_y1 = col_y1[ten_percent:len_y-ten_percent]
339
340		# run peakutils package to detect the peaks for both top and bottom
341		peak_top = peakutils.indexes(mod_col_y2, thres=0.99, min_dist=20)
342		peak_bottom = peakutils.indexes(abs(mod_col_y1), thres=0.99, min_dist=20)
343
344		# detemine length of both halves of data
345		len_top = len(peak_top)
346		len_bot = len(peak_bottom)
347
348		# append the values to the storage list
349		# manipulate values by adding the ten_percent value back
350		# (as the indecies have moved)
351		# to detect the actual peaks and not the modified values
352		index_list.append(peak_top[int(len_top/2)]+ten_percent)
353		index_list.append(peak_bottom[int(len_bot/2)]+ten_percent)
354
355		# return storage list
356		# first value is the top, second value is the bottom
357		return index_list
358
359
360	View Code Duplication	def peak_values(DataFrame_x, DataFrame_y):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
361		"""Outputs x (potentials) and y (currents) values from data indices
362		given by peak_detection function.
363
364		----------
365		Parameters
366		----------
367		DataFrame_x : should be in the form of a pandas DataFrame column.
368		For example, df['potentials'] could be input as the column of x
369		data.
370
371		DataFrame_y : should be in the form of a pandas DataFrame column.
372		For example, df['currents'] could be input as the column of y
373		data.
374
375		Returns
376		-------
377		Result : numpy array of coordinates at peaks in the following order:
378		potential of peak on top curve, current of peak on top curve,
379		potential of peak on bottom curve, current of peak on bottom curve"""
380		index = peak_detection_fxn(DataFrame_y)
381		potential1, potential2 = split(DataFrame_x)
382		current1, current2 = split(DataFrame_y)
383		Peak_values = []
384		Peak_values.append(potential2[(index[0])]) # TOPX (bottom part of curve is
385		# the first part of DataFrame)
386		Peak_values.append(current2[(index[0])]) # TOPY
387		Peak_values.append(potential1[(index[1])]) # BOTTOMX
388		Peak_values.append(current1[(index[1])]) # BOTTOMY
389		Peak_array = np.array(Peak_values)
390		return Peak_array
391
392
393		def del_potential(DataFrame_x, DataFrame_y):
394		"""Outputs the difference in potentials between anoidc and
395		cathodic peaks in cyclic voltammetry data.
396
397		Parameters
398		----------
399		DataFrame_x : should be in the form of a pandas DataFrame column.
400		For example, df['potentials'] could be input as the column of x
401		data.
402
403		DataFrame_y : should be in the form of a pandas DataFrame column.
404		For example, df['currents'] could be input as the column of y
405		data.
406
407		Returns
408		-------
409		Results: difference in peak potentials in the form of a numpy array."""
410		del_potentials = (peak_values(DataFrame_x, DataFrame_y)[0] -
411		peak_values(DataFrame_x, DataFrame_y)[2])
412		return del_potentials
413
414
415		def half_wave_potential(DataFrame_x, DataFrame_y):
416		"""Outputs the half wave potential(redox potential) from cyclic
417		voltammetry data.
418
419		Parameters
420		----------
421		DataFrame_x : should be in the form of a pandas DataFrame column.
422		For example, df['potentials'] could be input as the column of x
423		data.
424
425		DataFrame_y : should be in the form of a pandas DataFrame column.
426		For example, df['currents'] could be input as the column of y
427		data.
428
429		Returns
430		-------
431		Results : the half wave potential in the form of a
432		floating point number."""
433		half_wave_potential = (del_potential(DataFrame_x, DataFrame_y))/2
434		return half_wave_potential
435
436
437	View Code Duplication	def peak_heights(DataFrame_x, DataFrame_y):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
438		"""Outputs heights of minimum peak and maximum
439		peak from cyclic voltammetry data.
440
441		Parameters
442		----------
443		DataFrame_x : should be in the form of a pandas DataFrame column.
444		For example, df['potentials'] could be input as the column of x
445		data.
446
447		DataFrame_y : should be in the form of a pandas DataFrame column.
448		For example, df['currents'] could be input as the column of y
449		data.
450
451		Returns
452		-------
453		Results: height of maximum peak, height of minimum peak
454		in that order in the form of a list."""
455		current_max = peak_values(DataFrame_x, DataFrame_y)[1]
456		current_min = peak_values(DataFrame_x, DataFrame_y)[3]
457		x1, x2 = split(DataFrame_x)
458		y1, y2 = split(DataFrame_y)
459		line_at_min = linear_background(x1, y1)[peak_detection_fxn(DataFrame_y)[1]]
460		line_at_max = linear_background(x2, y2)[peak_detection_fxn(DataFrame_y)[0]]
461		height_of_max = current_max - line_at_max
462		height_of_min = abs(current_min - line_at_min)
463		return [height_of_max, height_of_min]
464
465
466		def peak_ratio(DataFrame_x, DataFrame_y):
467		"""Outputs the peak ratios from cyclic voltammetry data.
468
469		Parameters
470		----------
471		DataFrame_x : should be in the form of a pandas DataFrame column.
472		For example, df['potentials'] could be input as the column of x
473		data.
474
475		DataFrame_y : should be in the form of a pandas DataFrame column.
476		For example, df['currents'] could be input as the column of y
477		data.
478
479		Returns
480		-------
481		Result : returns a floating point number, the peak ratio."""
482		ratio = (peak_heights(DataFrame_x, DataFrame_y)[0] /
483		peak_heights(DataFrame_x, DataFrame_y)[1])
484		return ratio
485

sabiharustam / voltcycle

GitHub Access Token became invalid

Push — master ( e4b26c...e987f0 )

core.data_frame() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like