core - Code Metrics - Inspection of "Added read_file_dash function" - sabiharustam/voltcycle - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( e987f0...527a82 )

by Sabiha

created 2019-03-21 21:07 UTC

core B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	529
Duplicated Lines	49.15 %

Importance

Changes

Metric	Value
wmc	45
eloc	194
dl	260
loc	529
rs	8.8
c	0
b	0
f	0

18 Functions

Rating	Name	Duplication	Size	Complexity
A	read_cycle()	21	21	2
C	read_file()	46	46	10
A	half_wave_potential()	0	20	1
A	data_frame()	16	16	1
A	del_potential()	0	20	1
A	y_fitted_line()	0	19	2
A	multiplica()	0	19	2
A	peak_values()	31	31	1
A	linear_coeff()	0	16	1
A	linear_background()	0	20	1
A	plot_fig()	22	22	2
A	peak_detection_fxn()	55	55	1
A	sum_mean()	0	15	2
A	split()	0	21	1
B	critical_idx()	0	46	6
A	peak_heights()	27	27	1
C	read_file_dash()	42	42	9
A	peak_ratio()	0	19	1

How to fix Duplicated Code Complexity

# This is a tool to automate cyclic voltametry analysis.
# Current Version = 1

import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import warnings
import matplotlib.cbook
import peakutils
import copy
from matplotlib import rcParams


def read_cycle(data):

    """This function reads a segment of datafile (corresponding a cycle)
    and generates a dataframe with columns 'Potential' and 'Current'

    Parameters
    __________
    data: segment of data file

    Returns
    _______
    A dataframe with potential and current columns  
    """     

    current = []
    potential = []
    for i in data[3:]:
        current.append(float(i.split("\t")[4]))
        potential.append(float(i.split("\t")[3]))
    zippedList = list(zip(potential, current))
    df = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
    return df


def read_file_dash(lines):

    """This function is exactly similar to read_file, but it is for dash

    Parameters
    __________
    file: lines from dash input file

    Returns:
    ________
    dict_of_df: dictionary of dataframes with keys = cycle numbers and
    values = dataframes for each cycle
    n_cycle: number of cycles in the raw file
    """
    dict_of_df = {}
    h = 0
    l = 0
    n_cycle = 0
    number = 0
    #a = []
    #with open(file, 'rt') as f:
    #    print(file + ' Opened')
    for line in lines:
        record = 0
        if not (h and l):
            if line.startswith('SCANRATE'):
                scan_rate = float(line.split()[2])
                h = 1
            if line.startswith('STEPSIZE'):
                step_size = float(line.split()[2])
                l = 1
        if line.startswith('CURVE'):
            n_cycle += 1
            if n_cycle > 1:
                number = n_cycle - 1
                df = read_cycle(a)

                key_name = 'cycle_' + str(number)
                #key_name = number
                dict_of_df[key_name] = copy.deepcopy(df)
            a = []
        if n_cycle:
            a.append(line)
    return dict_of_df, number


def read_file(file):

    """This function reads the raw data file, gets the scanrate and stepsize
    and then reads the lines according to cycle number. Once it reads the data
    for one cycle, it calls read_cycle function to generate a dataframe. It 
    does the same thing for all the cycles and finally returns a dictionary,
    the keys of which are the cycle numbers and the values are the 
    corresponding dataframes.

    Parameters
    __________
    file: raw data file

    Returns:
    ________
    dict_of_df: dictionary of dataframes with keys = cycle numbers and
    values = dataframes for each cycle
    n_cycle: number of cycles in the raw file  
    """   
    dict_of_df = {} 
    h = 0
    l = 0
    n_cycle = 0
    #a = []
    with open(file, 'rt') as f:
        print(file + ' Opened')
        for line in f:
            record = 0
            if not (h and l):
                if line.startswith('SCANRATE'):
                    scan_rate = float(line.split()[2])
                    h = 1
                if line.startswith('STEPSIZE'):
                    step_size = float(line.split()[2])
                    l = 1
            if line.startswith('CURVE'):
                n_cycle += 1
                if n_cycle > 1:
                    number = n_cycle - 1
                    df = read_cycle(a)

                    key_name = 'cycle_' + str(number)
                    #key_name = number
                    dict_of_df[key_name] = copy.deepcopy(df)
                a = []
            if n_cycle:
                a.append(line)
    return dict_of_df, number



#df = pd.DataFrame(list(dict1['df_1'].items()))
#list1, list2 = list(dict1['df_1'].items())
#list1, list2 = list(dict1.get('df_'+str(1)))

def data_frame(dict_cycle, n):

    """Reads the dictionary of dataframes and returns dataframes for each cycle

    Parameters
    __________
    dict_cycle: Dictionary of dataframes
    n: cycle number

    Returns:
    _______
    Dataframe correcponding to the cycle number 
    """
    list1, list2 = (list(dict_cycle.get('cycle_'+str(n)).items()))
    zippedList = list(zip(list1[1], list2[1]))
    data  = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
    return data


def plot_fig(dict_cycle, n):

    """For basic plotting of the cycle data
  
    Parameters
    __________
    dict: dictionary of dataframes for all the cycles
    n: number of cycles

    Saves the plot in a file called cycle.png 
    """

    for i in range(n):
        print(i+1)
        df = data_frame(dict_cycle, i+1)
        plt.plot(df.Potential, df.Current, label = "Cycle{}".format(i+1))
        
    #print(df.head())
    plt.xlabel('Voltage')
    plt.ylabel('Current')
    plt.legend()
    plt.savefig('cycle.png')
    print('executed')


#split forward and backward sweping data, to make it easier for processing.
def split(vector):
    """
    This function takes an array and splits it into equal two half.
    ----------
    Parameters
    ----------
    vector : Can be in any form of that can be turned into numpy array.
    Normally, for the use of this function, it expects pandas DataFrame column.
    For example, df['potentials'] could be input as the column of x data.
    -------
    Returns
    -------
    This function returns two equally splited vector. 
    The output then can be used to ease the implementation of peak detection and baseline finding.
    """
    assert type(vector) == pd.core.series.Series, "Input of the function should be pandas series"
    split = int(len(vector)/2)
    end = int(len(vector))
    vector1 = np.array(vector)[0:split]
    vector2 = np.array(vector)[split:end]
    return vector1, vector2


def critical_idx(x, y): ## Finds index where data set is no longer linear 
    """
    This function takes x and y values callculate the derrivative of x and y, and calculate moving average of 5 and 15 points.
    Finds intercepts of different moving average curves and return the indexs of the first intercepts.
    ----------
    Parameters
    ----------
    x : Numpy array.
    y : Numpy array.
    Normally, for the use of this function, it expects numpy array that came out from split function.
    For example, output of split.df['potentials'] could be input for this function as x.
    -------
    Returns
    -------
    This function returns 5th index of the intercepts of different moving average curves. 
    User can change this function according to baseline branch method 2 to get various indexes..
    """
    assert type(x) == np.ndarray, "Input of the function should be numpy array"
    assert type(y) == np.ndarray, "Input of the function should be numpy array"
    if x.shape[0] != y.shape[0]:
        raise ValueError("x and y must have same first dimension, but "
                        "have shapes {} and {}".format(x.shape, y.shape))
    k = np.diff(y)/(np.diff(x)) #calculated slops of x and y
    ## Calculate moving average for 10 and 15 points.
    ## This two arbitrary number can be tuned to get better fitting.
    ave10 = []
    ave15 = []
    for i in range(len(k)-10):
    # The reason to minus 10 is to prevent j from running out of index.
        a = 0 
        for j in range(0,5):
            a = a + k[i+j]
        ave10.append(round(a/10, 5)) 
    # keeping 5 desimal points for more accuracy
    # This numbers affect how sensitive to noise.
    for i in range(len(k)-15): 
        b = 0 
        for j in range(0,15):
            b = b + k[i+j]
        ave15.append(round(b/15, 5))
    ave10i = np.asarray(ave10)
    ave15i = np.asarray(ave15)
    ## Find intercepts of different moving average curves
    #reshape into one row. 
    idx = np.argwhere(np.diff(np.sign(ave15i - ave10i[:len(ave15i)])!= 0)).reshape(-1)+0
    return idx[5]
# This is based on the method 1 where user can't choose the baseline.
# If wanted to add that, choose method2.


def sum_mean(vector):
    """
    This function returns the mean and sum of the given vector. 
    ----------                                                                                                             
    Parameters
    ----------
    vector : Can be in any form of that can be turned into numpy array.
    Normally, for the use of this function, it expects pandas DataFrame column.
    For example, df['potentials'] could be input as the column of x data.
    """
    assert type(vector) == np.ndarray, "Input of the function should be numpy array"
    a = 0
    for i in vector:
        a = a + i
    return [a,a/len(vector)]


def multiplica(vector_x, vector_y):
    """
    This function returns the sum of the multilica of two given vector. 
    ----------                                                                                                             
    Parameters
    ----------
    vector_x, vector_y : Output of the split vector function.
    Two inputs can be the same vector or different vector with same length.
    -------
    Returns
    -------
    This function returns a number that is the sum of multiplicity of given two vector.
    """
    assert type(vector_x) == np.ndarray, "Input of the function should be numpy array"
    assert type(vector_y) == np.ndarray, "Input of the function should be numpy array"
    a = 0
    for x,y in zip(vector_x, vector_y):
        a = a + (x * y)
    return a

def linear_coeff(x, y):
    """
    This function returns the inclination coeffecient and y axis interception coeffecient m and b. 
    ----------                                                                                                             
    Parameters
    ----------
    x : Output of the split vector function.
    y : Output of the split vector function.
    -------
    Returns
    -------
    float number of m and b.
    """
    m = (multiplica(x,y) - sum_mean(x)[0] * sum_mean(y)[1]) / (multiplica(x,x) - sum_mean(x)[0] * sum_mean(x)[1])  
    b = sum_mean(y)[1] - m * sum_mean(x)[1]
    return m, b


def y_fitted_line(m, b, x):
    """
    This function returns the fitted baseline constructed by coeffecient m and b and x values. 
    ----------                                                                                                             
    Parameters
    ----------
    x : Output of the split vector function. x value of the input.
    m : inclination of the baseline.
    b : y intercept of the baseline.
    -------
    Returns
    -------
    List of constructed y_labels.
    """
    y_base = []
    for i in x:
        y = m * i + b
        y_base.append(y)
    return y_base


def linear_background(x, y):
    """
    This function is wrapping function for calculating linear fitted line.
    It takes x and y values of the cv data, returns the fitted baseline. 
    ----------                                                                                                             
    Parameters
    ----------
    x : Output of the split vector function. x value of the cyclic voltammetry data.
    y : Output of the split vector function. y value of the cyclic voltammetry data. 
    -------
    Returns
    -------
    List of constructed y_labels.
    """
    assert type(x) == np.ndarray, "Input of the function should be numpy array"
    assert type(y) == np.ndarray, "Input of the function should be numpy array"
    idx = critical_idx(x, y) + 5 #this is also arbitrary number we can play with.
    m, b = linear_coeff(x[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))], y[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))])
    y_base = y_fitted_line(m, b, x)
    return y_base

def peak_detection_fxn(data_y):

    """The function takes an input of the column containing the y variables in the dataframe,
    associated with the current. The function calls the split function, which splits the
    column into two arrays, one of the positive and one of the negative values.
    This is because cyclic voltammetry delivers negative peaks, but the peakutils function works
    better with positive peaks. The function also runs on the middle 80% of data to eliminate
    unnecessary noise and messy values associated with pseudo-peaks.The vectors are then imported
    into the peakutils.indexes function to determine the significant peak for each array.
    The values are stored in a list, with the first index corresponding to the top peak and the
    second corresponding to the bottom peak.
    Parameters
    ______________
    y column: must be a column from a pandas dataframe

    Returns
    _____________
    A list with the index of the peaks from the top curve and bottom curve.
    """

    # initialize storage list
    index_list = []

    # split data into above and below the baseline
    col_y1, col_y2 = split(data_y) # removed main. head.

    # detemine length of data and what 10% of the data is
    len_y = len(col_y1)
    ten_percent = int(np.around(0.1*len_y))

    # adjust both input columns to be the middle 80% of data
    # (take of the first and last 10% of data)
    # this avoid detecting peaks from electrolysis
    # (from water splitting and not the molecule itself,
    # which can form random "peaks")
    mod_col_y2 = col_y2[ten_percent:len_y-ten_percent]
    mod_col_y1 = col_y1[ten_percent:len_y-ten_percent]

    # run peakutils package to detect the peaks for both top and bottom
    peak_top = peakutils.indexes(mod_col_y2, thres=0.99, min_dist=20)
    peak_bottom = peakutils.indexes(abs(mod_col_y1), thres=0.99, min_dist=20)

    # detemine length of both halves of data
    len_top = len(peak_top)
    len_bot = len(peak_bottom)

    # append the values to the storage list
    # manipulate values by adding the ten_percent value back
    # (as the indecies have moved)
    # to detect the actual peaks and not the modified values
    index_list.append(peak_top[int(len_top/2)]+ten_percent)
    index_list.append(peak_bottom[int(len_bot/2)]+ten_percent)

    # return storage list
    # first value is the top, second value is the bottom
    return index_list


def peak_values(DataFrame_x, DataFrame_y):

    """Outputs x (potentials) and y (currents) values from data indices
        given by peak_detection function.

       ----------
       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

       Returns
       -------
       Result : numpy array of coordinates at peaks in the following order:
         potential of peak on top curve, current of peak on top curve,
         potential of peak on bottom curve, current of peak on bottom curve"""
    index = peak_detection_fxn(DataFrame_y)
    potential1, potential2 = split(DataFrame_x)
    current1, current2 = split(DataFrame_y)
    Peak_values = []
    Peak_values.append(potential2[(index[0])])  # TOPX (bottom part of curve is
    # the first part of DataFrame)
    Peak_values.append(current2[(index[0])])  # TOPY
    Peak_values.append(potential1[(index[1])])  # BOTTOMX
    Peak_values.append(current1[(index[1])])  # BOTTOMY
    Peak_array = np.array(Peak_values)
    return Peak_array


def del_potential(DataFrame_x, DataFrame_y):
    """Outputs the difference in potentials between anoidc and
       cathodic peaks in cyclic voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

        Returns
        -------
        Results: difference in peak potentials in the form of a numpy array."""
    del_potentials = (peak_values(DataFrame_x, DataFrame_y)[0] -
                      peak_values(DataFrame_x, DataFrame_y)[2])
    return del_potentials


def half_wave_potential(DataFrame_x, DataFrame_y):
    """Outputs the half wave potential(redox potential) from cyclic
       voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

       Returns
       -------
       Results : the half wave potential in the form of a
         floating point number."""
    half_wave_potential = (del_potential(DataFrame_x, DataFrame_y))/2
    return half_wave_potential


def peak_heights(DataFrame_x, DataFrame_y):

    """Outputs heights of minimum peak and maximum
         peak from cyclic voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

        Returns
        -------
        Results: height of maximum peak, height of minimum peak
          in that order in the form of a list."""
    current_max = peak_values(DataFrame_x, DataFrame_y)[1]
    current_min = peak_values(DataFrame_x, DataFrame_y)[3]
    x1, x2 = split(DataFrame_x)
    y1, y2 = split(DataFrame_y)
    line_at_min = linear_background(x1, y1)[peak_detection_fxn(DataFrame_y)[1]]
    line_at_max = linear_background(x2, y2)[peak_detection_fxn(DataFrame_y)[0]]
    height_of_max = current_max - line_at_max
    height_of_min = abs(current_min - line_at_min)
    return [height_of_max, height_of_min]


def peak_ratio(DataFrame_x, DataFrame_y):
    """Outputs the peak ratios from cyclic voltammetry data.

       Parameters
       ----------
       DataFrame_x : should be in the form of a pandas DataFrame column.
         For example, df['potentials'] could be input as the column of x
         data.

        DataFrame_y : should be in the form of a pandas DataFrame column.
          For example, df['currents'] could be input as the column of y
          data.

       Returns
       -------
       Result : returns a floating point number, the peak ratio."""
    ratio = (peak_heights(DataFrame_x, DataFrame_y)[0] /
             peak_heights(DataFrame_x, DataFrame_y)[1])
    return ratio


1		# This is a tool to automate cyclic voltametry analysis.
2		# Current Version = 1
3
4		import pandas as pd
5		import numpy as np
6		import csv
7		import matplotlib.pyplot as plt
8		import warnings
9		import matplotlib.cbook
10		import peakutils
11		import copy
12		from matplotlib import rcParams
13
14
15	View Code Duplication	def read_cycle(data):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
16		"""This function reads a segment of datafile (corresponding a cycle)
17		and generates a dataframe with columns 'Potential' and 'Current'
18
19		Parameters
20		__________
21		data: segment of data file
22
23		Returns
24		_______
25		A dataframe with potential and current columns
26		"""
27
28		current = []
29		potential = []
30		for i in data[3:]:
31		current.append(float(i.split("\t")[4]))
32		potential.append(float(i.split("\t")[3]))
33		zippedList = list(zip(potential, current))
34		df = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
35		return df
36
37
38	View Code Duplication	def read_file_dash(lines):
		0 ignored issues – show Duplication introduced 2019-03-21 21:09 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
39		"""This function is exactly similar to read_file, but it is for dash
40
41		Parameters
42		__________
43		file: lines from dash input file
44
45		Returns:
46		________
47		dict_of_df: dictionary of dataframes with keys = cycle numbers and
48		values = dataframes for each cycle
49		n_cycle: number of cycles in the raw file
50		"""
51		dict_of_df = {}
52		h = 0
53		l = 0
54		n_cycle = 0
55		number = 0
56		#a = []
57		#with open(file, 'rt') as f:
58		# print(file + ' Opened')
59		for line in lines:
60		record = 0
61		if not (h and l):
62		if line.startswith('SCANRATE'):
63		scan_rate = float(line.split()[2])
64		h = 1
65		if line.startswith('STEPSIZE'):
66		step_size = float(line.split()[2])
67		l = 1
68		if line.startswith('CURVE'):
69		n_cycle += 1
70		if n_cycle > 1:
71		number = n_cycle - 1
72		df = read_cycle(a)
		0 ignored issues – show introduced 2019-03-18 22:56 UTC by Report Bug Copy Issue Report The variable `a` does not seem to be defined for all execution paths. Loading history...
73		key_name = 'cycle_' + str(number)
74		#key_name = number
75		dict_of_df[key_name] = copy.deepcopy(df)
76		a = []
77		if n_cycle:
78		a.append(line)
79		return dict_of_df, number
80
81
82	View Code Duplication	def read_file(file):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
83		"""This function reads the raw data file, gets the scanrate and stepsize
84		and then reads the lines according to cycle number. Once it reads the data
85		for one cycle, it calls read_cycle function to generate a dataframe. It
86		does the same thing for all the cycles and finally returns a dictionary,
87		the keys of which are the cycle numbers and the values are the
88		corresponding dataframes.
89
90		Parameters
91		__________
92		file: raw data file
93
94		Returns:
95		________
96		dict_of_df: dictionary of dataframes with keys = cycle numbers and
97		values = dataframes for each cycle
98		n_cycle: number of cycles in the raw file
99		"""
100		dict_of_df = {}
101		h = 0
102		l = 0
103		n_cycle = 0
104		#a = []
105		with open(file, 'rt') as f:
106		print(file + ' Opened')
107		for line in f:
108		record = 0
109		if not (h and l):
110		if line.startswith('SCANRATE'):
111		scan_rate = float(line.split()[2])
112		h = 1
113		if line.startswith('STEPSIZE'):
114		step_size = float(line.split()[2])
115		l = 1
116		if line.startswith('CURVE'):
117		n_cycle += 1
118		if n_cycle > 1:
119		number = n_cycle - 1
120		df = read_cycle(a)
		0 ignored issues – show introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report The variable `a` does not seem to be defined for all execution paths. Loading history...
121		key_name = 'cycle_' + str(number)
122		#key_name = number
123		dict_of_df[key_name] = copy.deepcopy(df)
124		a = []
125		if n_cycle:
126		a.append(line)
127		return dict_of_df, number
		0 ignored issues – show introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report The variable `number` does not seem to be defined for all execution paths. Loading history...
128
129
130		#df = pd.DataFrame(list(dict1['df_1'].items()))
131		#list1, list2 = list(dict1['df_1'].items())
132		#list1, list2 = list(dict1.get('df_'+str(1)))
133
134	View Code Duplication	def data_frame(dict_cycle, n):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
135		"""Reads the dictionary of dataframes and returns dataframes for each cycle
136
137		Parameters
138		__________
139		dict_cycle: Dictionary of dataframes
140		n: cycle number
141
142		Returns:
143		_______
144		Dataframe correcponding to the cycle number
145		"""
146		list1, list2 = (list(dict_cycle.get('cycle_'+str(n)).items()))
147		zippedList = list(zip(list1[1], list2[1]))
148		data = pd.DataFrame(zippedList, columns = ['Potential' , 'Current'])
149		return data
150
151
152	View Code Duplication	def plot_fig(dict_cycle, n):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
153		"""For basic plotting of the cycle data
154
155		Parameters
156		__________
157		dict: dictionary of dataframes for all the cycles
158		n: number of cycles
159
160		Saves the plot in a file called cycle.png
161		"""
162
163		for i in range(n):
164		print(i+1)
165		df = data_frame(dict_cycle, i+1)
166		plt.plot(df.Potential, df.Current, label = "Cycle{}".format(i+1))
167
168		#print(df.head())
169		plt.xlabel('Voltage')
170		plt.ylabel('Current')
171		plt.legend()
172		plt.savefig('cycle.png')
173		print('executed')
174
175
176		#split forward and backward sweping data, to make it easier for processing.
177		def split(vector):
178		"""
179		This function takes an array and splits it into equal two half.
180		----------
181		Parameters
182		----------
183		vector : Can be in any form of that can be turned into numpy array.
184		Normally, for the use of this function, it expects pandas DataFrame column.
185		For example, df['potentials'] could be input as the column of x data.
186		-------
187		Returns
188		-------
189		This function returns two equally splited vector.
190		The output then can be used to ease the implementation of peak detection and baseline finding.
191		"""
192		assert type(vector) == pd.core.series.Series, "Input of the function should be pandas series"
193		split = int(len(vector)/2)
194		end = int(len(vector))
195		vector1 = np.array(vector)[0:split]
196		vector2 = np.array(vector)[split:end]
197		return vector1, vector2
198
199
200		def critical_idx(x, y): ## Finds index where data set is no longer linear
201		"""
202		This function takes x and y values callculate the derrivative of x and y, and calculate moving average of 5 and 15 points.
203		Finds intercepts of different moving average curves and return the indexs of the first intercepts.
204		----------
205		Parameters
206		----------
207		x : Numpy array.
208		y : Numpy array.
209		Normally, for the use of this function, it expects numpy array that came out from split function.
210		For example, output of split.df['potentials'] could be input for this function as x.
211		-------
212		Returns
213		-------
214		This function returns 5th index of the intercepts of different moving average curves.
215		User can change this function according to baseline branch method 2 to get various indexes..
216		"""
217		assert type(x) == np.ndarray, "Input of the function should be numpy array"
218		assert type(y) == np.ndarray, "Input of the function should be numpy array"
219		if x.shape[0] != y.shape[0]:
220		raise ValueError("x and y must have same first dimension, but "
221		"have shapes {} and {}".format(x.shape, y.shape))
222		k = np.diff(y)/(np.diff(x)) #calculated slops of x and y
223		## Calculate moving average for 10 and 15 points.
224		## This two arbitrary number can be tuned to get better fitting.
225		ave10 = []
226		ave15 = []
227		for i in range(len(k)-10):
228		# The reason to minus 10 is to prevent j from running out of index.
229		a = 0
230		for j in range(0,5):
231		a = a + k[i+j]
232		ave10.append(round(a/10, 5))
233		# keeping 5 desimal points for more accuracy
234		# This numbers affect how sensitive to noise.
235		for i in range(len(k)-15):
236		b = 0
237		for j in range(0,15):
238		b = b + k[i+j]
239		ave15.append(round(b/15, 5))
240		ave10i = np.asarray(ave10)
241		ave15i = np.asarray(ave15)
242		## Find intercepts of different moving average curves
243		#reshape into one row.
244		idx = np.argwhere(np.diff(np.sign(ave15i - ave10i[:len(ave15i)])!= 0)).reshape(-1)+0
245		return idx[5]
246		# This is based on the method 1 where user can't choose the baseline.
247		# If wanted to add that, choose method2.
248
249
250		def sum_mean(vector):
251		"""
252		This function returns the mean and sum of the given vector.
253		----------
254		Parameters
255		----------
256		vector : Can be in any form of that can be turned into numpy array.
257		Normally, for the use of this function, it expects pandas DataFrame column.
258		For example, df['potentials'] could be input as the column of x data.
259		"""
260		assert type(vector) == np.ndarray, "Input of the function should be numpy array"
261		a = 0
262		for i in vector:
263		a = a + i
264		return [a,a/len(vector)]
265
266
267		def multiplica(vector_x, vector_y):
268		"""
269		This function returns the sum of the multilica of two given vector.
270		----------
271		Parameters
272		----------
273		vector_x, vector_y : Output of the split vector function.
274		Two inputs can be the same vector or different vector with same length.
275		-------
276		Returns
277		-------
278		This function returns a number that is the sum of multiplicity of given two vector.
279		"""
280		assert type(vector_x) == np.ndarray, "Input of the function should be numpy array"
281		assert type(vector_y) == np.ndarray, "Input of the function should be numpy array"
282		a = 0
283		for x,y in zip(vector_x, vector_y):
284		a = a + (x * y)
285		return a
286
287		def linear_coeff(x, y):
288		"""
289		This function returns the inclination coeffecient and y axis interception coeffecient m and b.
290		----------
291		Parameters
292		----------
293		x : Output of the split vector function.
294		y : Output of the split vector function.
295		-------
296		Returns
297		-------
298		float number of m and b.
299		"""
300		m = (multiplica(x,y) - sum_mean(x)[0] * sum_mean(y)[1]) / (multiplica(x,x) - sum_mean(x)[0] * sum_mean(x)[1])
301		b = sum_mean(y)[1] - m * sum_mean(x)[1]
302		return m, b
303
304
305		def y_fitted_line(m, b, x):
306		"""
307		This function returns the fitted baseline constructed by coeffecient m and b and x values.
308		----------
309		Parameters
310		----------
311		x : Output of the split vector function. x value of the input.
312		m : inclination of the baseline.
313		b : y intercept of the baseline.
314		-------
315		Returns
316		-------
317		List of constructed y_labels.
318		"""
319		y_base = []
320		for i in x:
321		y = m * i + b
322		y_base.append(y)
323		return y_base
324
325
326		def linear_background(x, y):
327		"""
328		This function is wrapping function for calculating linear fitted line.
329		It takes x and y values of the cv data, returns the fitted baseline.
330		----------
331		Parameters
332		----------
333		x : Output of the split vector function. x value of the cyclic voltammetry data.
334		y : Output of the split vector function. y value of the cyclic voltammetry data.
335		-------
336		Returns
337		-------
338		List of constructed y_labels.
339		"""
340		assert type(x) == np.ndarray, "Input of the function should be numpy array"
341		assert type(y) == np.ndarray, "Input of the function should be numpy array"
342		idx = critical_idx(x, y) + 5 #this is also arbitrary number we can play with.
343		m, b = linear_coeff(x[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))], y[(idx - int(0.5 * idx)) : (idx + int(0.5 * idx))])
344		y_base = y_fitted_line(m, b, x)
345		return y_base
346
347	View Code Duplication	def peak_detection_fxn(data_y):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
348		"""The function takes an input of the column containing the y variables in the dataframe,
349		associated with the current. The function calls the split function, which splits the
350		column into two arrays, one of the positive and one of the negative values.
351		This is because cyclic voltammetry delivers negative peaks, but the peakutils function works
352		better with positive peaks. The function also runs on the middle 80% of data to eliminate
353		unnecessary noise and messy values associated with pseudo-peaks.The vectors are then imported
354		into the peakutils.indexes function to determine the significant peak for each array.
355		The values are stored in a list, with the first index corresponding to the top peak and the
356		second corresponding to the bottom peak.
357		Parameters
358		______________
359		y column: must be a column from a pandas dataframe
360
361		Returns
362		_____________
363		A list with the index of the peaks from the top curve and bottom curve.
364		"""
365
366		# initialize storage list
367		index_list = []
368
369		# split data into above and below the baseline
370		col_y1, col_y2 = split(data_y) # removed main. head.
371
372		# detemine length of data and what 10% of the data is
373		len_y = len(col_y1)
374		ten_percent = int(np.around(0.1*len_y))
375
376		# adjust both input columns to be the middle 80% of data
377		# (take of the first and last 10% of data)
378		# this avoid detecting peaks from electrolysis
379		# (from water splitting and not the molecule itself,
380		# which can form random "peaks")
381		mod_col_y2 = col_y2[ten_percent:len_y-ten_percent]
382		mod_col_y1 = col_y1[ten_percent:len_y-ten_percent]
383
384		# run peakutils package to detect the peaks for both top and bottom
385		peak_top = peakutils.indexes(mod_col_y2, thres=0.99, min_dist=20)
386		peak_bottom = peakutils.indexes(abs(mod_col_y1), thres=0.99, min_dist=20)
387
388		# detemine length of both halves of data
389		len_top = len(peak_top)
390		len_bot = len(peak_bottom)
391
392		# append the values to the storage list
393		# manipulate values by adding the ten_percent value back
394		# (as the indecies have moved)
395		# to detect the actual peaks and not the modified values
396		index_list.append(peak_top[int(len_top/2)]+ten_percent)
397		index_list.append(peak_bottom[int(len_bot/2)]+ten_percent)
398
399		# return storage list
400		# first value is the top, second value is the bottom
401		return index_list
402
403
404	View Code Duplication	def peak_values(DataFrame_x, DataFrame_y):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
405		"""Outputs x (potentials) and y (currents) values from data indices
406		given by peak_detection function.
407
408		----------
409		Parameters
410		----------
411		DataFrame_x : should be in the form of a pandas DataFrame column.
412		For example, df['potentials'] could be input as the column of x
413		data.
414
415		DataFrame_y : should be in the form of a pandas DataFrame column.
416		For example, df['currents'] could be input as the column of y
417		data.
418
419		Returns
420		-------
421		Result : numpy array of coordinates at peaks in the following order:
422		potential of peak on top curve, current of peak on top curve,
423		potential of peak on bottom curve, current of peak on bottom curve"""
424		index = peak_detection_fxn(DataFrame_y)
425		potential1, potential2 = split(DataFrame_x)
426		current1, current2 = split(DataFrame_y)
427		Peak_values = []
428		Peak_values.append(potential2[(index[0])]) # TOPX (bottom part of curve is
429		# the first part of DataFrame)
430		Peak_values.append(current2[(index[0])]) # TOPY
431		Peak_values.append(potential1[(index[1])]) # BOTTOMX
432		Peak_values.append(current1[(index[1])]) # BOTTOMY
433		Peak_array = np.array(Peak_values)
434		return Peak_array
435
436
437		def del_potential(DataFrame_x, DataFrame_y):
438		"""Outputs the difference in potentials between anoidc and
439		cathodic peaks in cyclic voltammetry data.
440
441		Parameters
442		----------
443		DataFrame_x : should be in the form of a pandas DataFrame column.
444		For example, df['potentials'] could be input as the column of x
445		data.
446
447		DataFrame_y : should be in the form of a pandas DataFrame column.
448		For example, df['currents'] could be input as the column of y
449		data.
450
451		Returns
452		-------
453		Results: difference in peak potentials in the form of a numpy array."""
454		del_potentials = (peak_values(DataFrame_x, DataFrame_y)[0] -
455		peak_values(DataFrame_x, DataFrame_y)[2])
456		return del_potentials
457
458
459		def half_wave_potential(DataFrame_x, DataFrame_y):
460		"""Outputs the half wave potential(redox potential) from cyclic
461		voltammetry data.
462
463		Parameters
464		----------
465		DataFrame_x : should be in the form of a pandas DataFrame column.
466		For example, df['potentials'] could be input as the column of x
467		data.
468
469		DataFrame_y : should be in the form of a pandas DataFrame column.
470		For example, df['currents'] could be input as the column of y
471		data.
472
473		Returns
474		-------
475		Results : the half wave potential in the form of a
476		floating point number."""
477		half_wave_potential = (del_potential(DataFrame_x, DataFrame_y))/2
478		return half_wave_potential
479
480
481	View Code Duplication	def peak_heights(DataFrame_x, DataFrame_y):
		0 ignored issues – show Duplication introduced 2019-03-14 22:59 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
482		"""Outputs heights of minimum peak and maximum
483		peak from cyclic voltammetry data.
484
485		Parameters
486		----------
487		DataFrame_x : should be in the form of a pandas DataFrame column.
488		For example, df['potentials'] could be input as the column of x
489		data.
490
491		DataFrame_y : should be in the form of a pandas DataFrame column.
492		For example, df['currents'] could be input as the column of y
493		data.
494
495		Returns
496		-------
497		Results: height of maximum peak, height of minimum peak
498		in that order in the form of a list."""
499		current_max = peak_values(DataFrame_x, DataFrame_y)[1]
500		current_min = peak_values(DataFrame_x, DataFrame_y)[3]
501		x1, x2 = split(DataFrame_x)
502		y1, y2 = split(DataFrame_y)
503		line_at_min = linear_background(x1, y1)[peak_detection_fxn(DataFrame_y)[1]]
504		line_at_max = linear_background(x2, y2)[peak_detection_fxn(DataFrame_y)[0]]
505		height_of_max = current_max - line_at_max
506		height_of_min = abs(current_min - line_at_min)
507		return [height_of_max, height_of_min]
508
509
510		def peak_ratio(DataFrame_x, DataFrame_y):
511		"""Outputs the peak ratios from cyclic voltammetry data.
512
513		Parameters
514		----------
515		DataFrame_x : should be in the form of a pandas DataFrame column.
516		For example, df['potentials'] could be input as the column of x
517		data.
518
519		DataFrame_y : should be in the form of a pandas DataFrame column.
520		For example, df['currents'] could be input as the column of y
521		data.
522
523		Returns
524		-------
525		Result : returns a floating point number, the peak ratio."""
526		ratio = (peak_heights(DataFrame_x, DataFrame_y)[0] /
527		peak_heights(DataFrame_x, DataFrame_y)[1])
528		return ratio
529

sabiharustam / voltcycle

GitHub Access Token became invalid

Push — master ( e987f0...527a82 )

core B

Complexity

Size/Duplication

Importance

18 Functions

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like