diff_classifier.msd - Code Metrics - Inspection of "Fixed merge artifacts." - ccurtis7/diff_classifier - Measure and Improve Code Quality continuously with Scrutinizer

Test Failed
Push — master ( cd34bd...574a84 )

by Chad
created 2019-07-12 05:28 UTC
diff_classifier.msd F

↳ Parent: Project
Complexity

Total Complexity
Size/Duplication

Total Lines	1047
Duplicated Lines	90.74 %
Importance

Changes
Metric	Value
eloc	478
dl	950
loc	1047
rs	3.28
c	0
b	0
f	0
wmc	64
13 Functions

Rating	Name	Duplication	Size	Complexity
B	checkerboard_mask()	52	52	7
B	make_xyarray()	146	146	2
B	nth_diff()	65	65	5
A	precision_weight()	38	38	2
B	plot_all_experiments()	87	87	6
A	random_traj_dataset()	56	56	2
A	precision_averaging()	67	67	3
B	all_msds()	74	74	3
F	random_walk()	105	105	18
A	geomean_msdisp()	62	62	4
B	all_msds2()	98	98	6
A	binning()	34	34	2
A	msd_calc()	66	66	3
1 Method

Rating	Name	Duplication	Size	Complexity
A	Bunch.__init__()	0	2	1
How to fix Duplicated Code Complexity

"""Functions to calculate mean squared displacements from trajectory data

This module includes functions to calculate mean squared displacements and
additional measures from input trajectory datasets as calculated by the
Trackmate ImageJ plugin.

"""
import warnings
import random as rand

import pandas as pd
import numpy as np
import numpy.ma as ma
import scipy.stats as stats
from scipy import interpolate
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import diff_classifier.aws as aws
from scipy.ndimage.morphology import distance_transform_edt as eudist


def nth_diff(dataframe, n=1, axis=0):

    """Calculates the nth difference between vector elements

    Returns a new vector of size N - n containing the nth difference between
    vector elements.

    Parameters
    ----------
    dataframe : pandas.core.series.Series of int or float
        Input data on which differences are to be calculated.
    n : int
        Function calculated xpos(i) - xpos(i - n) for all values in pandas
        series.
    axis : {0, 1}
        Axis along which differences are to be calculated.  Default is 0.  If 0,
        input must be a pandas series.  If 1, input must be a numpy array.

    Returns
    -------
    diff : pandas.core.series.Series of int or float
        Pandas series of size N - n, where N is the original size of dataframe.

    Examples
    --------
    >>> df = np.ones((5, 10))
    >>> nth_diff(df)
    array([[0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0.]])

    >>> df = np.ones((5, 10))
    >>> nth_diff (df)
    array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

    """

    assert isinstance(n, int), "n must be an integer."

    if dataframe.ndim == 1:
        length = dataframe.shape[0]
        if n <= length:
            test1 = dataframe[:-n].reset_index(drop=True)
            test2 = dataframe[n:].reset_index(drop=True)
            diff = test2 - test1
        else:
            diff = np.array([np.nan, np.nan])
    else:
        length = dataframe.shape[0]
        if n <= length:
            if axis == 0:
                test1 = dataframe[:-n, :]
                test2 = dataframe[n:, :]
            else:
                test1 = dataframe[:, :-n]
                test2 = dataframe[:, n:]
            diff = test2 - test1
        else:
            diff = np.array([np.nan, np.nan])

    return diff


def msd_calc(track, length=10):

    """Calculates mean squared displacement of input track.

    Returns numpy array containing MSD data calculated from an individual track.

    Parameters
    ----------
    track : pandas.core.frame.DataFrame
        Contains, at a minimum a 'Frame', 'X', and 'Y' column

    Returns
    -------
    new_track : pandas.core.frame.DataFrame
        Similar to input track.  All missing frames of individual trajectories
        are filled in with NaNs, and two new columns, MSDs and Gauss are added:
        MSDs, calculated mean squared displacements using the formula
        MSD = <(xpos-x0)**2>
        Gauss, calculated Gaussianity

    Examples
    --------
    >>> data1 = {'Frame': [1, 2, 3, 4, 5],
    ...          'X': [5, 6, 7, 8, 9],
    ...          'Y': [6, 7, 8, 9, 10]}
    >>> df = pd.DataFrame(data=data1)
    >>> new_track = msd.msd_calc(df, 5)

    >>> data1 = {'Frame': [1, 2, 3, 4, 5],
    ...          'X': [5, 6, 7, 8, 9],
    ...          'Y': [6, 7, 8, 9, 10]}
    >>> df = pd.DataFrame(data=data1)
    >>> new_track = msd.msd_calc(df)

    """

    meansd = np.zeros(length)
    gauss = np.zeros(length)
    new_frame = np.linspace(1, length, length)
    old_frame = track['Frame']
    oldxy = [track['X'], track['Y']]
    fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False,
                                fill_value=np.nan),
           interpolate.interp1d(old_frame, oldxy[1], bounds_error=False,
                                fill_value=np.nan)]

    intxy = [ma.masked_equal(fxy[0](new_frame), np.nan),
             ma.masked_equal(fxy[1](new_frame), np.nan)]
    data1 = {'Frame': new_frame,
             'X': intxy[0],
             'Y': intxy[1]
             }
    new_track = pd.DataFrame(data=data1)

    for frame in range(0, length-1):

        xy = [np.square(nth_diff(new_track['X'], n=frame+1)),
              np.square(nth_diff(new_track['Y'], n=frame+1))]
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)
            meansd[frame+1] = np.nanmean(xy[0] + xy[1])
            gauss[frame+1] = np.nanmean(xy[0]**2 + xy[1]**2
                                        )/(2*(meansd[frame+1]**2))

    new_track['MSDs'] = pd.Series(meansd, index=new_track.index)
    new_track['Gauss'] = pd.Series(gauss, index=new_track.index)

    return new_track


def all_msds(data):

    """Calculates mean squared displacements of a trajectory dataset

    Returns numpy array containing MSD data of all tracks in a trajectory
    pandas dataframe.

    Parameters
    ----------
    data : pandas.core.frame.DataFrame
        Contains, at a minimum a 'Frame', 'Track_ID', 'X', and
        'Y' column. Note: it is assumed that frames begins at 1, not 0 with this
        function. Adjust before feeding into function.

    Returns
    -------
    new_data : pandas.core.frame.DataFrame
        Similar to input data.  All missing frames of individual trajectories
        are filled in with NaNs, and two new columns, MSDs and Gauss are added:
        MSDs, calculated mean squared displacements using the formula
        MSD = <(xpos-x0)**2>
        Gauss, calculated Gaussianity

    Examples
    --------
    >>> data1 = {'Frame': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
    ...          'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
    ...          'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5],
    ...          'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]}
    >>> df = pd.DataFrame(data=data1)
    >>> all_msds(df)

     """

    trackids = data.Track_ID.unique()
    partcount = trackids.shape[0]
    length = int(max(data['Frame']))
    new = {}
    new['length'] = partcount*length
    new['frame'] = np.zeros(new['length'])
    new['ID'] = np.zeros(new['length'])
    new['xy'] = [np.zeros(new['length']),
                 np.zeros(new['length'])]
    meansd = np.zeros(new['length'])
    gauss = np.zeros(new['length'])

    for particle in range(0, partcount):

        single_track = data.loc[data['Track_ID'] ==
                                trackids[particle]
                                ].sort_values(['Track_ID', 'Frame'],
                                              ascending=[1, 1]
                                              ).reset_index(drop=True)
        if particle == 0:
            index1 = 0
            index2 = length
        else:
            index1 = index2

            index2 = index2 + length
        new['single_track'] = msd_calc(single_track, length=length)
        new['frame'][index1:index2] = np.linspace(1, length, length)
        new['ID'][index1:index2] = particle+1
        new['xy'][0][index1:index2] = new['single_track']['X']
        new['xy'][1][index1:index2] = new['single_track']['Y']
        meansd[index1:index2] = new['single_track']['MSDs']
        gauss[index1:index2] = new['single_track']['Gauss']

    data1 = {'Frame': new['frame'],
             'Track_ID': new['ID'],
             'X': new['xy'][0],
             'Y': new['xy'][1],
             'MSDs': meansd,
             'Gauss': gauss}
    new_data = pd.DataFrame(data=data1)

    return new_data


def make_xyarray(data, length=651):

    """Rearranges xy position data into 2d arrays

    Rearranges xy data from input pandas dataframe into 2D numpy array.

    Parameters
    ----------
    data : pd.core.frame.DataFrame
        Contains, at a minimum a 'Frame', 'Track_ID', 'X', and
        'Y' column.
    length : int
        Desired length or number of frames to which to extend trajectories.
        Any trajectories shorter than the input length will have the extra space
        filled in with NaNs.

    Returns
    -------
    xyft : dict of np.ndarray
        Dictionary containing xy position data, frame data, and trajectory ID
        data. Contains the following keys:
        farray, frames data (length x particles)
        tarray, trajectory ID data (length x particles)
        xarray, x position data (length x particles)
        yarray, y position data (length x particles)

    Examples
    --------
    >>> data1 = {'Frame': [0, 1, 2, 3, 4, 2, 3, 4, 5, 6],
    ...          'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
    ...          'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5],
    ...          'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]}
    >>> df = pd.DataFrame(data=data1)
    >>> length = max(df['Frame']) + 1
    >>> xyft = msd.make_xyarray(df, length=length)
    {'farray': array([[0., 0.],
               [1., 1.],
               [2., 2.],
               [3., 3.],
               [4., 4.],
               [5., 5.],
               [6., 6.]]),
     'tarray': array([[1., 2.],
               [1., 2.],
               [1., 2.],
               [1., 2.],
               [1., 2.],
               [1., 2.],
               [1., 2.]]),
     'xarray': array([[ 5., nan],
               [ 6., nan],
               [ 7.,  1.],
               [ 8.,  2.],
               [ 9.,  3.],
               [nan,  4.],
     'yarray': [nan,  5.]]),
               array([[ 6., nan],
               [ 7., nan],
               [ 8.,  2.],
               [ 9.,  3.],
               [10.,  4.],
               [nan,  5.],
               [nan,  6.]])}

    """

    # Initial values
    first_p = int(min(data['Track_ID']))
    particles = int(max(data['Track_ID'])) - first_p + 1
    xyft = {}
    xyft['xarray'] = np.zeros((length, particles))
    xyft['yarray'] = np.zeros((length, particles))
    xyft['farray'] = np.zeros((length, particles))
    xyft['tarray'] = np.zeros((length, particles))
    xyft['qarray'] = np.zeros((length, particles))
    xyft['snarray'] = np.zeros((length, particles))
    xyft['iarray'] = np.zeros((length, particles))

    track = data[data['Track_ID'] == first_p
                 ].sort_values(['Track_ID', 'Frame'],
                               ascending=[1, 1]).reset_index(drop=True)
    new_frame = np.linspace(0, length-1, length)

    old_frame = track['Frame'].values.astype(float)
    oldxy = [track['X'].values,
             track['Y'].values,
             track['Quality'].values,
             track['SN_Ratio'].values,
             track['Mean_Intensity'].values]
    fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False,
                                fill_value=np.nan),
           interpolate.interp1d(old_frame, oldxy[1], bounds_error=False,
                                fill_value=np.nan),
           interpolate.interp1d(old_frame, oldxy[2], bounds_error=False,
                                fill_value=np.nan),
           interpolate.interp1d(old_frame, oldxy[3], bounds_error=False,
                                fill_value=np.nan),
           interpolate.interp1d(old_frame, oldxy[4], bounds_error=False,
                                fill_value=np.nan)]

    intxy = [fxy[0](new_frame), fxy[1](new_frame), fxy[2](new_frame),
             fxy[3](new_frame), fxy[4](new_frame)]

    # Fill in entire array
    xyft['xarray'][:, 0] = intxy[0]
    xyft['yarray'][:, 0] = intxy[1]
    xyft['farray'][:, 0] = new_frame
    xyft['tarray'][:, 0] = first_p
    xyft['qarray'][:, 0] = intxy[2]
    xyft['snarray'][:, 0] = intxy[3]
    xyft['iarray'][:, 0] = intxy[4]

    for part in range(first_p+1, first_p+particles):

        track = data[data['Track_ID'] == part
                     ].sort_values(['Track_ID', 'Frame'],
                                   ascending=[1, 1]).reset_index(drop=True)

        old_frame = track['Frame']
        oldxy = [track['X'].values,
                 track['Y'].values,
                 track['Quality'].values,
                 track['SN_Ratio'].values,
                 track['Mean_Intensity'].values]

        fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False,
                                    fill_value=np.nan),
               interpolate.interp1d(old_frame, oldxy[1], bounds_error=False,
                                    fill_value=np.nan),
               interpolate.interp1d(old_frame, oldxy[2], bounds_error=False,
                                    fill_value=np.nan),
               interpolate.interp1d(old_frame, oldxy[3], bounds_error=False,
                                    fill_value=np.nan),
               interpolate.interp1d(old_frame, oldxy[4], bounds_error=False,
                                    fill_value=np.nan)]

        intxy = [fxy[0](new_frame), fxy[1](new_frame), fxy[2](new_frame),
                 fxy[3](new_frame), fxy[4](new_frame)]

        xyft['xarray'][:, part-first_p] = intxy[0]
        xyft['yarray'][:, part-first_p] = intxy[1]
        xyft['farray'][:, part-first_p] = new_frame
        xyft['tarray'][:, part-first_p] = part
        xyft['qarray'][:, part-first_p] = intxy[2]
        xyft['snarray'][:, part-first_p] = intxy[3]
        xyft['iarray'][:, part-first_p] = intxy[4]

    return xyft


def all_msds2(data, frames=651):

    """Calculates mean squared displacements of input trajectory dataset

    Returns numpy array containing MSD data of all tracks in a trajectory pandas
    dataframe.

    Parameters
    ----------
    data : pandas.core.frame.DataFrame
        Contains, at a minimum a 'Frame', 'Track_ID', 'X', and
        'Y' column. Note: it is assumed that frames begins at 0.

    Returns
    -------
    new_data : pandas.core.frame.DataFrame
        Similar to input data.  All missing frames of individual trajectories
        are filled in with NaNs, and two new columns, MSDs and Gauss are added:
        MSDs, calculated mean squared displacements using the formula
        MSD = <(xpos-x0)**2>
        Gauss, calculated Gaussianity

    Examples
    --------
    >>> data1 = {'Frame': [0, 1, 2, 3, 4, 0, 1, 2, 3, 4],
    ...          'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
    ...          'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5],
    ...          'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]}
    >>> df = pd.DataFrame(data=data1)
    >>> cols = ['Frame', 'Track_ID', 'X', 'Y', 'MSDs', 'Gauss']
    >>> om flength = max(df['Frame']) + 1
    >>> msd.all_msds2(df, frames=length)[cols]

    """
    if data.shape[0] > 2:
        try:
            xyft = make_xyarray(data, length=frames)
            length = xyft['xarray'].shape[0]
            particles = xyft['xarray'].shape[1]

            meansd = np.zeros((length, particles))
            gauss = np.zeros((length, particles))

            for frame in range(0, length-1):

                xpos = np.square(nth_diff(xyft['xarray'], n=frame+1))
                ypos = np.square(nth_diff(xyft['yarray'], n=frame+1))

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore", category=RuntimeWarning)
                    meansd[frame+1, :] = np.nanmean(xpos + ypos, axis=0)
                    gauss[frame+1, :] = np.nanmean(xpos**2 + ypos**2, axis=0
                                                   )/(2*(meansd[frame+1]**2))

            data1 = {'Frame': xyft['farray'].flatten('F'),
                     'Track_ID': xyft['tarray'].flatten('F'),
                     'X': xyft['xarray'].flatten('F'),
                     'Y': xyft['yarray'].flatten('F'),
                     'MSDs': meansd.flatten('F'),
                     'Gauss': gauss.flatten('F'),
                     'Quality': xyft['qarray'].flatten('F'),
                     'SN_Ratio': xyft['snarray'].flatten('F'),
                     'Mean_Intensity': xyft['iarray'].flatten('F')}

            new_data = pd.DataFrame(data=data1)
        except ValueError:
            data1 = {'Frame': [],
                     'Track_ID': [],
                     'X': [],
                     'Y': [],
                     'MSDs': [],
                     'Gauss': [],
                     'Quality': [],
                     'SN_Ratio': [],
                     'Mean_Intensity': []}
            new_data = pd.DataFrame(data=data1)
        except IndexError:
            data1 = {'Frame': [],
                     'Track_ID': [],
                     'X': [],
                     'Y': [],
                     'MSDs': [],
                     'Gauss': [],
                     'Quality': [],
                     'SN_Ratio': [],
                     'Mean_Intensity': []}
            new_data = pd.DataFrame(data=data1)
    else:
        data1 = {'Frame': [],
                 'Track_ID': [],
                 'X': [],
                 'Y': [],
                 'MSDs': [],
                 'Gauss': [],
                 'Quality': [],
                 'SN_Ratio': [],
                 'Mean_Intensity': []}
        new_data = pd.DataFrame(data=data1)

    return new_data


def geomean_msdisp(prefix, umppx=0.16, fps=100.02, upload=True,

                   remote_folder="01_18_Experiment", bucket='ccurtis.data',
                   backup_frames=651):
    """Comptes geometric averages of mean squared displacement datasets

    Calculates geometric averages and stadard errors for MSD datasets. Might
    error out if not formatted as output from all_msds2.

    Parameters
    ----------
    prefix : string
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
    umppx : float
        Microns per pixel of original images.
    fps : float
        Frames per second of video.
    upload : bool
        True if you want to upload to s3.
    remote_folder : string
        Folder in S3 bucket to upload to.
    bucket : string
        Name of S3 bucket to upload to.

    Returns
    -------
    geo_mean : numpy.ndarray
        Geometric mean of trajectory MSDs at all time points.
    geo_stder : numpy.ndarray
        Geometric standard errot of trajectory MSDs at all time points.

    """

    merged = pd.read_csv('msd_{}.csv'.format(prefix))
    try:
        particles = int(max(merged['Track_ID']))
        frames = int(max(merged['Frame']))
        ypos = np.zeros((particles+1, frames+1))

        for i in range(0, particles+1):

            ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx
            xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps

        geo_mean = np.nanmean(ma.log(ypos), axis=0)
        geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0,
                                              nan_policy='omit'), 0.0)

    except ValueError:
        geo_mean = np.nan*np.ones(backup_frames)
        geo_stder = np.nan*np.ones(backup_frames)

    np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",")
    np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",")

    if upload:
        aws.upload_s3('geomean_{}.csv'.format(prefix),
                      remote_folder+'/'+'geomean_{}.csv'.format(prefix),
                      bucket_name=bucket)
        aws.upload_s3('geoSEM_{}.csv'.format(prefix),
                      remote_folder+'/'+'geoSEM_{}.csv'.format(prefix),
                      bucket_name=bucket)

    return geo_mean, geo_stder


def binning(experiments, wells=4, prefix='test'):

    """Split set of input experiments into groups.

    Parameters
    ----------
    experiments : list of str
        List of experiment names.
    wells : int
        Number of groups to divide experiments into.

    Returns
    -------
    slices : int
        Number of experiments per group.
    bins : dict of list of str
        Dictionary, keys corresponding to group names, and elements containing
        lists of experiments in each group.
    bin_names : list of str
        List of group names

    """

    total_videos = len(experiments)
    bins = {}
    slices = int(total_videos/wells)
    bin_names = []

    for num in range(0, wells):

        slice1 = num*slices
        slice2 = (num+1)*(slices)
        pref = '{}_W{}'.format(prefix, num)
        bins[pref] = experiments[slice1:slice2]
        bin_names.append(pref)
    return slices, bins, bin_names


def precision_weight(group, geo_stder):

    """Calculates precision-based weights from input standard error data

    Calculates precision weights to be used in precision-averaged MSD
    calculations.

    Parameters
    ----------
    group : list of str
        List of experiment names to average. Each element corresponds to a key
        in geo_stder and geomean.
    geo_stder : dict of numpy.ndarray
        Each entry in dictionary corresponds to the standard errors of an MSD
        profile, the key corresponding to an experiment name.

    Returns
    -------
    weights: numpy.ndarray
        Precision weights to be used in precision averaging.
    w_holder : numpy.ndarray
        Precision values of each video at each time point.

    """

    frames = np.shape(geo_stder[group[0]])[0]
    slices = len(group)
    video_counter = 0
    w_holder = np.zeros((slices, frames))
    for sample in group:
        w_holder[video_counter, :] = 1/(geo_stder[sample]*geo_stder[sample])
        video_counter = video_counter + 1

    w_holder = ma.masked_equal(w_holder, 0.0)
    w_holder = ma.masked_equal(w_holder, 1.0)

    weights = ma.sum(w_holder, axis=0)

    return weights, w_holder


def precision_averaging(group, geomean, geo_stder, weights, save=True,

                        bucket='ccurtis.data', folder='test',
                        experiment='test'):
    """Calculates precision-weighted averages of MSD datasets.

    Parameters
    ----------
    group : list of str
        List of experiment names to average. Each element corresponds to a key
        in geo_stder and geomean.
    geomean : dict of numpy.ndarray
        Each entry in dictionary corresponds to an MSD profiles, they key
        corresponding to an experiment name.
    geo_stder : dict of numpy.ndarray
        Each entry in dictionary corresponds to the standard errors of an MSD
        profile, the key corresponding to an experiment name.
    weights : numpy.ndarray
        Precision weights to be used in precision averaging.

    Returns
    -------
    geo : numpy.ndarray
        Precision-weighted averaged MSDs from experiments specified in group
    geo_stder : numpy.ndarray
        Precision-weighted averaged SEMs from experiments specified in group

    """

    frames = np.shape(geo_stder[group[0]])[0]
    slices = len(group)

    video_counter = 0
    geo_holder = np.zeros((slices, frames))
    gstder_holder = np.zeros((slices, frames))
    w_holder = np.zeros((slices, frames))
    for sample in group:
        w_holder[video_counter, :] = (1/(geo_stder[sample]*geo_stder[sample])
                                      )/weights
        geo_holder[video_counter, :] = w_holder[video_counter, :
                                                ] * geomean[sample]
        gstder_holder[video_counter, :] = 1/(geo_stder[sample]*geo_stder[sample]
                                             )
        video_counter = video_counter + 1

    w_holder = ma.masked_equal(w_holder, 0.0)
    w_holder = ma.masked_equal(w_holder, 1.0)
    geo_holder = ma.masked_equal(geo_holder, 0.0)
    geo_holder = ma.masked_equal(geo_holder, 1.0)
    gstder_holder = ma.masked_equal(gstder_holder, 0.0)
    gstder_holder = ma.masked_equal(gstder_holder, 1.0)

    geo = ma.sum(geo_holder, axis=0)
    geo_stder = ma.sqrt((1/ma.sum(gstder_holder, axis=0)))

    if save:
        geo_f = 'geomean_{}.csv'.format(experiment)
        gstder_f = 'geoSEM_{}.csv'.format(experiment)
        np.savetxt(geo_f, geo, delimiter=',')
        np.savetxt(gstder_f, geo_stder, delimiter=',')
        aws.upload_s3(geo_f, '{}/{}'.format(folder, geo_f), bucket_name=bucket)
        aws.upload_s3(gstder_f, '{}/{}'.format(folder, gstder_f),
                      bucket_name=bucket)

    geodata = Bunch(geomean=geo, geostd=geo_stder, weighthold=w_holder,
                    geostdhold=gstder_holder)

    return geodata


def plot_all_experiments(experiments, bucket='ccurtis.data', folder='test',

                         yrange=(10**-1, 10**1), fps=100.02,
                         xrange=(10**-2, 10**0), upload=True,
                         outfile='test.png', exponential=True,
                         labels=None, log=True):
    """Plots precision-weighted averages of MSD datasets.

    Plots pre-calculated precision-weighted averages of MSD datasets calculated
    from precision_averaging and stored in an AWS S3 bucket.

    Parameters
    ----------
    group : list of str
        List of experiment names to plot. Each experiment must have an MSD and
        SEM file associated with it in s3.
    bucket : str
        S3 bucket from which to download data.
    folder : str
        Folder in s3 bucket from which to download data.
    yrange : list of float
        Y range of plot
    xrange: list of float
        X range of plot
    upload : bool
        True to upload to S3
    outfile : str
        Filename of output image

    """

    n = len(experiments)

    if labels==None:
        labels = experiments

    color = iter(cm.viridis(np.linspace(0, 0.9, n)))

    fig = plt.figure(figsize=(8.5, 8.5))
    ax = fig.add_subplot(111)
    plt.xlim(xrange[0], xrange[1])
    plt.ylim(yrange[0], yrange[1])
    plt.xlabel('Tau (s)', fontsize=25)
    plt.ylabel(r'Mean Squared Displacement ($\mu$m$^2$)', fontsize=25)

    geo = {}
    gstder = {}
    counter = 0
    for experiment in experiments:
        aws.download_s3('{}/geomean_{}.csv'.format(folder, experiment),
                        'geomean_{}.csv'.format(experiment), bucket_name=bucket)
        aws.download_s3('{}/geoSEM_{}.csv'.format(folder, experiment),
                        'geoSEM_{}.csv'.format(experiment), bucket_name=bucket)

        geo[counter] = np.genfromtxt('geomean_{}.csv'.format(experiment))
        gstder[counter] = np.genfromtxt('geoSEM_{}.csv'.format(experiment))
        geo[counter] = ma.masked_equal(geo[counter], 0.0)
        gstder[counter] = ma.masked_equal(gstder[counter], 0.0)

        frames = np.shape(gstder[counter])[0]
        xpos = np.linspace(0, frames-1, frames)/fps
        c = next(color)

        if exponential:
            ax.plot(xpos, np.exp(geo[counter]), c=c, linewidth=6,
                    label=labels[counter])
            ax.fill_between(xpos, np.exp(geo[counter] - 1.96*gstder[counter]),
                            np.exp(geo[counter] + 1.96*gstder[counter]),
                            color=c, alpha=0.4)

        else:
            ax.plot(xpos, geo[counter], c=c, linewidth=6,
                    label=labels[counter])
            ax.fill_between(xpos, geo[counter] - 1.96*gstder[counter],
                            geo[counter] + 1.96*gstder[counter], color=c,
                            alpha=0.4)

        counter = counter + 1

    if log:
        ax.set_xscale("log")
        ax.set_yscale("log")

    plt.legend(frameon=False, loc=2, prop={'size': 16})

    if upload:
        fig.savefig(outfile, bbox_inches='tight')
        aws.upload_s3(outfile, folder+'/'+outfile, bucket_name=bucket)


def checkerboard_mask(dims=(512, 512), squares=50, width=25):

    """Creates a 2D Boolean checkerboard mask

    Creates a Boolean array of evenly spaced squares.
    Whitespace is set to True.

    Parameters
    ----------

    dims : tuple of int
        Dimensions of desired Boolean array
    squares : int
        Dimensions of in individual square in array
    width : int
        Dimension of spacing between squares

    Returns
    ----------

    zeros : numpy.ndarray of bool
        2D Boolean array of evenly spaced squares

    """
    zeros = np.zeros(dims) == 0
    square_d = squares

    loy = width
    hiy = loy + square_d

    for j in range(50):


        lox = width
        hix = lox + square_d
        for i in range(50):

            if hix < 512 and hiy < 512:
                zeros[loy:hiy, lox:hix] = False
            elif hix < 512:
                zeros[loy:512-1, lox:hix] = False
            elif hiy < 512:
                zeros[loy:hiy, lox:512-1] = False
            else:
                zeros[loy:512-1, lox:512-1] = False
                break

            lox = hix + width
            hix = lox + square_d

        loy = hiy + width
        hiy = loy + square_d

    return zeros


def random_walk(nsteps=100, seed=None, start=(0, 0), step=1, mask=None,

                stuckprob=0.5):
    """Creates 2d random walk trajectory.

    Parameters
    ----------
    nsteps : int
        Number of steps for trajectory to move.
    seed : int
        Seed for pseudo-random number generator for reproducability.
    start : tuple of int or float
        Starting xy coordinates at which the random walk begins.
    step : int or float
        Magnitude of single step
    mask : numpy.ndarray of bool
        Mask of barriers contraining diffusion
    stuckprop : float
        Probability of "particle" adhering to barrier when it makes contact

    Returns
    -------
    x : numpy.ndarray
        Array of x coordinates of random walk.
    y : numpy.ndarray
        Array of y coordinates of random walk.

    """

    if type(mask) is np.ndarray:
        while not mask[start[0], start[1]]:
            start = (start[0], start[1]+1)
        eumask = eudist(~mask)

    np.random.seed(seed=seed)

    x = np.zeros(nsteps)
    y = np.zeros(nsteps)
    x[0] = start[0]
    y[0] = start[1]

    # Checks to see if a mask is being used first
    if not type(mask) is np.ndarray:
        for i in range(1, nsteps):

            val = rand.randint(1, 4)
            if val == 1:
                x[i] = x[i - 1] + step
                y[i] = y[i - 1]
            elif val == 2:
                x[i] = x[i - 1] - step
                y[i] = y[i - 1]
            elif val == 3:
                x[i] = x[i - 1]
                y[i] = y[i - 1] + step
            else:
                x[i] = x[i - 1]
                y[i] = y[i - 1] - step
    else:
        # print("Applied mask")
        for i in range(1, nsteps):
            val = rand.randint(1, 4)
            # If mask is being used, checks if entry is in mask or not
            if mask[int(x[i-1]), int(y[i-1])]:
                if val == 1:
                    x[i] = x[i - 1] + step
                    y[i] = y[i - 1]
                elif val == 2:
                    x[i] = x[i - 1] - step
                    y[i] = y[i - 1]
                elif val == 3:
                    x[i] = x[i - 1]
                    y[i] = y[i - 1] + step
                else:
                    x[i] = x[i - 1]
                    y[i] = y[i - 1] - step
            # If it does cross into a False area, probability to be stuck
            elif np.random.rand() > stuckprob:
                x[i] = x[i - 1]
                y[i] = y[i - 1]

                while eumask[int(x[i]), int(y[i])] > 0:

                    vals = np.zeros(4)
                    vals[0] = eumask[int(x[i] + step), int(y[i])]
                    vals[1] = eumask[int(x[i] - step), int(y[i])]
                    vals[2] = eumask[int(x[i]), int(y[i] + step)]
                    vals[3] = eumask[int(x[i]), int(y[i] - step)]
                    vali = np.argmin(vals)

                    if vali == 0:
                        x[i] = x[i] + step
                        y[i] = y[i]
                    elif vali == 1:
                        x[i] = x[i] - step
                        y[i] = y[i]
                    elif vali == 2:
                        x[i] = x[i]
                        y[i] = y[i] + step
                    else:
                        x[i] = x[i]
                        y[i] = y[i] - step
            # Otherwise, particle is stuck on "cell"
            else:
                x[i] = x[i - 1]
                y[i] = y[i - 1]

    return x, y


# def random_walk(nsteps=100, seed=1, start=(0, 0)):
#     """Creates 2d random walk trajectory.
#
#     Parameters
#     ----------
#     nsteps : int
#         Number of steps for trajectory to move.
#     seed : int
#         Seed for pseudo-random number generator for reproducability.
#     start : tuple of int or float
#         Starting xy coordinates at which the random walk begins.
#
#     Returns
#     -------
#     x : numpy.ndarray
#         Array of x coordinates of random walk.
#     y : numpy.ndarray
#         Array of y coordinates of random walk.
#
#     """
#
#     rand.seed(a=seed)
#
#     x = np.zeros(nsteps)
#     y = np.zeros(nsteps)
#     x[0] = start[0]
#     y[0] = start[1]
#
#     for i in range(1, nsteps):
#         val = rand.randint(1, 4)
#         if val == 1:
#             x[i] = x[i - 1] + 1
#             y[i] = y[i - 1]
#         elif val == 2:
#             x[i] = x[i - 1] - 1
#             y[i] = y[i - 1]
#         elif val == 3:
#             x[i] = x[i - 1]
#             y[i] = y[i - 1] + 1
#         else:
#             x[i] = x[i - 1]
#             y[i] = y[i - 1] - 1
#
#     return x, y


def random_traj_dataset(nframes=100, nparts=30, seed=1, fsize=(0, 512),

                        ndist=(1, 2)):
    """Creates a random population of random walks.

    Parameters
    ----------
    nframes : int
        Number of frames for each random trajectory.
    nparts : int
        Number of particles in trajectory dataset.
    seed : int
        Seed for pseudo-random number generator for reproducability.
    fsize : tuple of int or float
        Scope of points over which particles may start at.
    ndist : tuple of int or float
        Parameters to generate normal distribution, mu and sigma.

    Returns
    -------
    dataf : pandas.core.frame.DataFrame
        Trajectory data containing a 'Frame', 'Track_ID', 'X', and
        'Y' column.

    """

    frames = []
    trackid = []
    x = []
    y = []
    start = [0, 0]
    pseed = seed

    for i in range(nparts):

        rand.seed(a=i+pseed)
        start[0] = rand.randint(fsize[0], fsize[1])
        rand.seed(a=i+3+pseed)
        start[1] = rand.randint(fsize[0], fsize[1])
        rand.seed(a=i+5+pseed)
        weight = rand.normalvariate(mu=ndist[0], sigma=ndist[1])

        trackid = np.append(trackid, np.array([i]*nframes))
        xi, yi = random_walk(nsteps=nframes, seed=i)
        x = np.append(x, weight*xi+start[0])
        y = np.append(y, weight*yi+start[1])
        frames = np.append(frames, np.linspace(0, nframes-1, nframes))

    datai = {'Frame': frames,
             'Track_ID': trackid,
             'X': x,
             'Y': y,
             'Quality': nframes*nparts*[10],
             'SN_Ratio': nframes*nparts*[0.1],
             'Mean_Intensity': nframes*nparts*[120]}
    dataf = pd.DataFrame(data=datai)

    return dataf


class Bunch:
    def __init__(self, **kwds):
        self.__dict__.update(kwds)

ccurtis7 / diff_classifier

Push — master ( cd34bd...574a84 )

diff_classifier.msd F

Complexity

Size/Duplication

Importance

13 Functions

1 Method

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like