diff_classifier.knotlets - Code Metrics - ccurtis7/diff_classifier - Measure and Improve Code Quality continuously with Scrutinizer

diff_classifier.knotlets A
last analyzed 2019-07-12 06:15 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	466
Duplicated Lines	72.96 %

Importance

Changes

Metric	Value
eloc	176
dl	340
loc	466
rs	10
c	0
b	0
f	0
wmc	22

5 Functions

Rating	Name	Duplication	Size	Complexity
A	tracking()	73	73	3
A	split()	64	64	2
A	split_track_msds()	54	54	5
C	assemble_msds()	107	107	8
A	geomean_msd()	42	42	4

How to fix Duplicated Code

'''Functions to submit tracking jobs to AWS Batch with Cloudknot

This is a set of custom functions for use with Cloutknot for parallelized
multi-particle tracking workflows. These can also be used as template if
users want to build their own parallelized workflows. See Cloudknot
documentation at https://richford.github.io/cloudknot/documentation.html
for more information.

The base set of functions is split, tracking, and assemble_msds. The split
function splits large images into smaller images that are manageable for a
single EC2 instance. The tracking function tracks nanoparticle trajectories in
a single sub-image from the split function. The assemble_msds function operates
on all sub-image trajectory csv files from the tracking function, calculates
MSDs and features and assembles them into a single msd csv file and a single
features csv file. The workflow looks something like this:

                  |-track---|
                  |-track---|
(image) -split----|         |--assemble_msds-----> (msd/feature files)
                  |-track---|
                  |-track---|

'''


def split(prefix, remote_folder, bucket,

          rows=4, cols=4, ores=(2048, 2048), ires=(512, 512)):
    '''Splits input image file into smaller images.

    A function based on imagej.partition_im that download images from an S3
    bucket, splits it into smaller images, and uploads these to S3. Designed to
    work with Cloudknot for parallelizable workflows. Typically, this function
    is used in conjunction with kn.tracking and kn.assemble_msds for a complete
    analysis.

    Parameters
    ----------
    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.
    ores : tuple of int
        Original resolution of input image.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.imagej as ij

    local_folder = os.getcwd()
    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)
    aws.download_s3(remote_name, local_name, bucket_name=bucket)

    s3 = boto3.client('s3')

    # Splitting section
    names = ij.partition_im(local_name, irows=rows, icols=cols,
                            ores=ores, ires=ires)

    # Names of subfiles
    # names = []
    # for i in range(0, 4):
    #     for j in range(0, 4):
    #         names.append('{}_{}_{}.tif'.format(prefix, i, j))

    for name in names:
        aws.upload_s3(name, remote_folder+'/'+name, bucket_name=bucket)
        os.remove(name)
        print("Done with splitting. Should output file of name {}".format(
              remote_folder+'/'+name))

    os.remove(filename)


def tracking(subprefix, remote_folder, bucket, tparams,

             regress_f='regress.obj', rows=4, cols=4, ires=(512, 512)):
    '''Tracks particles in input image using Trackmate.

    A function based on imagej.track that downloads the image from S3, tracks
    particles using Trackmate, and uploads the resulting trajectory file to S3.
    Designed to work with Cloudknot for parallelizable workflows. Typically,
    this function is used in conjunction with kn.split and kn.assemble_msds for
    a complete analysis.

    Parameters
    ----------
    subprefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    regress_f : string
        Name of regress object used to predict quality parameter.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    tparams : dict
        Dictionary containing tracking parameters to Trackmate analysis.

    '''

    import os
    import os.path as op
    import boto3
    from sklearn.externals import joblib
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij

    local_folder = os.getcwd()
    filename = '{}.tif'.format(subprefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename
    outfile = 'Traj_' + subprefix + '.csv'
    local_im = op.join(local_folder, '{}.tif'.format(subprefix))
    row = int(subprefix.split('_')[-2])
    col = int(subprefix.split('_')[-1])

    aws.download_s3(remote_folder+'/'+regress_f, regress_f, bucket_name=bucket)
    with open(regress_f, 'rb') as fp:
        regress = joblib.load(fp)

    s3 = boto3.client('s3')

    aws.download_s3('{}/{}'.format(remote_folder,
                    '{}.tif'.format(subprefix)),
                    local_im, bucket_name=bucket)
    tparams['quality'] = ij.regress_tracking_params(regress, subprefix,
                                                    regmethod='PassiveAggressiveRegressor')

    if row == rows-1:
        tparams['ydims'] = (tparams['ydims'][0], ires[1] - 27)

    ij.track(local_im, outfile, template=None, fiji_bin=None,
             tparams=tparams)
    aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
    print("Done with tracking.  Should output file of name {}".format(
          remote_folder+'/'+outfile))


def assemble_msds(prefix, remote_folder, bucket,

                  ires=(512, 512), frames=651):
    '''Calculates MSDs and features from input trajectory files

    A function based on msd.all_msds2 and features.calculate_features, creates
    msd and feature csv files from input trajectory files and uploads to S3.
    Designed to work with Cloudknot for parallelizable workflows. Typically,
    this function is used in conjunction with kn.split and kn.tracking for an
    entire workflow.

    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    frames : int
        Number of frames in input videos.

    '''

    import os
    import boto3
    import diff_classifier.aws as aws
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.utils as ut

    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    # names = []
    # for i in range(0, 4):
    #     for j in range(0, 4):
    #         names.append('{}_{}_{}.tif'.format(prefix, i, j))
    all_objects = s3.list_objects(Bucket=bucket,
                                  Prefix='{}/{}_'.format(remote_folder,
                                                         prefix))
    names = []
    rows = 0
    cols = 0
    for entry in all_objects['Contents']:
        name = entry['Key'].split('/')[-1]
        names.append(name)
        row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])
        if row > rows:
            rows = row
        if col > cols:
            cols = col
    rows = rows + 1
    cols = cols + 1

    counter = 0
    for name in names:
        row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
        col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])

        filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
        aws.download_s3(remote_folder+'/'+filename, filename,
                        bucket_name=bucket)
        local_name = filename

        if counter == 0:
            to_add = ut.csv_to_pd(local_name)
            to_add['X'] = to_add['X'] + ires[0]*col
            to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
            merged = msd.all_msds2(to_add, frames=frames)
        else:

            if merged.shape[0] > 0:

                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0]*col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
                to_add['Track_ID'] = to_add['Track_ID'
                                            ] + max(merged['Track_ID']) + 1
            else:
                to_add = ut.csv_to_pd(local_name)
                to_add['X'] = to_add['X'] + ires[0]*col
                to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
                to_add['Track_ID'] = to_add['Track_ID']

            merged = merged.append(msd.all_msds2(to_add, frames=frames))
            print('Done calculating MSDs for row {} and col {}'.format(row,
                                                                       col))
        counter = counter + 1

    merged.to_csv(msd_file)
    aws.upload_s3(msd_file, remote_folder+'/'+msd_file, bucket_name=bucket)
    merged_ft = ft.calculate_features(merged)
    merged_ft.to_csv(ft_file)
    aws.upload_s3(ft_file, remote_folder+'/'+ft_file, bucket_name=bucket)

    os.remove(ft_file)
    os.remove(msd_file)
    for name in names:
        outfile = 'Traj_' + name.split('.')[0] + '.csv'
        os.remove(outfile)


def split_track_msds(prefix, remote_folder, bucket, tparams,

                     rows=4, cols=4, ores=(2048, 2048), ires=(512, 512),
                     to_split=False, regress_f='regress.obj', frames=651):
    '''Splits images, track particles, and calculates MSDs

    A composite function designed to work with Cloudknot to split images,
    track particles, and calculate MSDs.

    Parameters
    ----------
    prefix : string
        Prefix (everything except file extension and folder name) of image file
        to be tracked. Must be available on S3.
    remote_folder : string
        Folder name where file is contained on S3 in the bucket specified by
        'bucket'.
    bucket : string
        S3 bucket where file is contained.
    rows : int
        Number of rows to split image into.
    cols : int
        Number of columns to split image into.
    ores : tuple of int
        Original resolution of input image.
    ires : tuple of int
        Resolution of split images. Really just a sanity check to make sure you
        correctly splitting.
    to_split : bool
        If True, will perform image splitting.
    regress_f : string
        Name of regress object used to predict quality parameter.
    frames : int
        Number of frames in input videos.
    tparams : dict
        Dictionary containing tracking parameters to Trackmate analysis.

    '''

    if to_split:
        split(prefix=prefix, remote_folder=remote_folder, bucket=bucket,
              rows=rows, cols=cols, ores=ores, ires=ires)

    pref = []
    for row in range(0, rows):

        for col in range(0, cols):
            pref.append("{}_{}_{}".format(prefix, row, col))

    for subprefix in pref:
        tracking(subprefix=subprefix, remote_folder=remote_folder, bucket=bucket,
                 regress_f=regress_f, rows=rows, cols=cols, ires=ires,
                 tparams=tparams)

    assemble_msds(prefix=prefix, remote_folder=remote_folder, bucket=bucket,
                  ires=ires, frames=frames)


# def sensitivity_it(counter):
#     '''Performs sensitivity analysis on single input image
#
#     An example function (not designed for re-use) of a sensitivity analysis that
#     demonstrates the impact of input tracking parameters on output MSDs and
#     features.
#
#     '''
#
#     import matplotlib as mpl
#     mpl.use('Agg')
#     import matplotlib.pyplot as plt
#     import diff_classifier.aws as aws
#     import diff_classifier.utils as ut
#     import diff_classifier.msd as msd
#     import diff_classifier.features as ft
#     import diff_classifier.imagej as ij
#     import diff_classifier.heatmaps as hm
#
#     from scipy.spatial import Voronoi
#     import scipy.stats as stats
#     from shapely.geometry import Point
#     from shapely.geometry.polygon import Polygon
#     import matplotlib.cm as cm
#     import os
#     import os.path as op
#     import numpy as np
#     import numpy.ma as ma
#     import pandas as pd
#     import boto3
#     import itertools
#
#     # Sweep parameters
#     # ----------------------------------
#     radius = [4.5, 6.0, 7.0]
#     do_median_filtering = [True, False]
#     quality = [1.5, 4.5, 8.5]
#     linking_max_distance = [6.0, 10.0, 15.0]
#     gap_closing_max_distance = [6.0, 10.0, 15.0]
#     max_frame_gap = [1, 2, 5]
#     track_displacement = [0.0, 10.0, 20.0]
#
#     sweep = [radius, do_median_filtering, quality, linking_max_distance,
#              gap_closing_max_distance, max_frame_gap, track_displacement]
#     all_params = list(itertools.product(*sweep))
#
#     # Variable prep
#     # ----------------------------------
#     s3 = boto3.client('s3')
#
#     folder = '01_18_Experiment'
#     s_folder = '{}/sensitivity'.format(folder)
#     local_folder = '.'
#     prefix = "P1_S1_R_0001_2_2"
#     name = "{}.tif".format(prefix)
#     local_im = op.join(local_folder, name)
#     aws.download_s3('{}/{}/{}.tif'.format(folder, prefix.split('_')[0], prefix),
#                     '{}.tif'.format(prefix))
#
#     outputs = np.zeros((len(all_params), len(all_params[0])+2))
#
#     # Tracking and calculations
#     # ------------------------------------
#     params = all_params[counter]
#     outfile = 'Traj_{}_{}.csv'.format(name.split('.')[0], counter)
#     msd_file = 'msd_{}_{}.csv'.format(name.split('.')[0], counter)
#     geo_file = 'geomean_{}_{}.csv'.format(name.split('.')[0], counter)
#     geoS_file = 'geoSEM_{}_{}.csv'.format(name.split('.')[0], counter)
#     msd_image = 'msds_{}_{}.png'.format(name.split('.')[0], counter)
#     iter_name = "{}_{}".format(prefix, counter)
#
#     ij.track(local_im, outfile, template=None, fiji_bin=None, radius=params[0], threshold=0.,
#              do_median_filtering=params[1], quality=params[2], x=511, y=511, ylo=1, median_intensity=300.0, snr=0.0,
#              linking_max_distance=params[3], gap_closing_max_distance=params[4], max_frame_gap=params[5],
#              track_displacement=params[6])
#
#     traj = ut.csv_to_pd(outfile)
#     msds = msd.all_msds2(traj, frames=651)
#     msds.to_csv(msd_file)
#     gmean1, gSEM1 = hm.plot_individual_msds(iter_name, alpha=0.05)
#     np.savetxt(geo_file, gmean1, delimiter=",")
#     np.savetxt(geoS_file, gSEM1, delimiter=",")
#
#     aws.upload_s3(outfile, '{}/{}'.format(s_folder, outfile))
#     aws.upload_s3(msd_file, '{}/{}'.format(s_folder, msd_file))
#     aws.upload_s3(geo_file, '{}/{}'.format(s_folder, geo_file))
#     aws.upload_s3(geoS_file, '{}/{}'.format(s_folder, geoS_file))
#     aws.upload_s3(msd_image, '{}/{}'.format(s_folder, msd_image))
#
#     print('Successful parameter calculations for {}'.format(iter_name))


def geomean_msd(prefix, umppx=0.16, fps=100.02, upload=True,

                   remote_folder="01_18_Experiment", bucket='ccurtis.data',
                   backup_frames=651):

    import pandas as pd
    import numpy as np
    import numpy.ma as ma
    import diff_classifier.aws as aws
    import scipy.stats as stats
    
    aws.download_s3('{}/msd_{}.csv'.format(remote_folder, prefix),
                    'msd_{}.csv'.format(prefix), bucket_name=bucket)
    merged = pd.read_csv('msd_{}.csv'.format(prefix))
    try:
        particles = int(max(merged['Track_ID']))
        frames = int(max(merged['Frame']))
        ypos = np.zeros((particles+1, frames+1))

        for i in range(0, particles+1):

            ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx
            xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps

        geo_mean = np.nanmean(ma.log(ypos), axis=0)
        geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0,
                                              nan_policy='omit'), 0.0)

    except ValueError:
        geo_mean = np.nan*np.ones(backup_frames)
        geo_stder = np.nan*np.ones(backup_frames)

    np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",")
    np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",")

    if upload:
        aws.upload_s3('geomean_{}.csv'.format(prefix),
                      remote_folder+'/'+'geomean_{}.csv'.format(prefix),
                      bucket_name=bucket)
        aws.upload_s3('geoSEM_{}.csv'.format(prefix),
                      remote_folder+'/'+'geoSEM_{}.csv'.format(prefix),
                      bucket_name=bucket)

    return geo_mean, geo_stder

1		'''Functions to submit tracking jobs to AWS Batch with Cloudknot
2
3		This is a set of custom functions for use with Cloutknot for parallelized
4		multi-particle tracking workflows. These can also be used as template if
5		users want to build their own parallelized workflows. See Cloudknot
6		documentation at https://richford.github.io/cloudknot/documentation.html
7		for more information.
8
9		The base set of functions is split, tracking, and assemble_msds. The split
10		function splits large images into smaller images that are manageable for a
11		single EC2 instance. The tracking function tracks nanoparticle trajectories in
12		a single sub-image from the split function. The assemble_msds function operates
13		on all sub-image trajectory csv files from the tracking function, calculates
14		MSDs and features and assembles them into a single msd csv file and a single
15		features csv file. The workflow looks something like this:
16
17		\|-track---\|
18		\|-track---\|
19		(image) -split----\| \|--assemble_msds-----> (msd/feature files)
20		\|-track---\|
21		\|-track---\|
22
23		'''
24
25
26	View Code Duplication	def split(prefix, remote_folder, bucket,
		0 ignored issues – show Duplication introduced 2019-03-06 18:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
27		rows=4, cols=4, ores=(2048, 2048), ires=(512, 512)):
28		'''Splits input image file into smaller images.
29
30		A function based on imagej.partition_im that download images from an S3
31		bucket, splits it into smaller images, and uploads these to S3. Designed to
32		work with Cloudknot for parallelizable workflows. Typically, this function
33		is used in conjunction with kn.tracking and kn.assemble_msds for a complete
34		analysis.
35
36		Parameters
37		----------
38		prefix : string
39		Prefix (everything except file extension and folder name) of image file
40		to be tracked. Must be available on S3.
41		remote_folder : string
42		Folder name where file is contained on S3 in the bucket specified by
43		'bucket'.
44		bucket : string
45		S3 bucket where file is contained.
46		rows : int
47		Number of rows to split image into.
48		cols : int
49		Number of columns to split image into.
50		ores : tuple of int
51		Original resolution of input image.
52		ires : tuple of int
53		Resolution of split images. Really just a sanity check to make sure you
54		correctly splitting.
55
56		'''
57
58		import os
59		import boto3
60		import diff_classifier.aws as aws
61		import diff_classifier.imagej as ij
62
63		local_folder = os.getcwd()
64		filename = '{}.tif'.format(prefix)
65		remote_name = remote_folder+'/'+filename
66		local_name = local_folder+'/'+filename
67		msd_file = 'msd_{}.csv'.format(prefix)
68		ft_file = 'features_{}.csv'.format(prefix)
69		aws.download_s3(remote_name, local_name, bucket_name=bucket)
70
71		s3 = boto3.client('s3')
72
73		# Splitting section
74		names = ij.partition_im(local_name, irows=rows, icols=cols,
75		ores=ores, ires=ires)
76
77		# Names of subfiles
78		# names = []
79		# for i in range(0, 4):
80		# for j in range(0, 4):
81		# names.append('{}_{}_{}.tif'.format(prefix, i, j))
82
83		for name in names:
84		aws.upload_s3(name, remote_folder+'/'+name, bucket_name=bucket)
85		os.remove(name)
86		print("Done with splitting. Should output file of name {}".format(
87		remote_folder+'/'+name))
88
89		os.remove(filename)
90
91
92	View Code Duplication	def tracking(subprefix, remote_folder, bucket, tparams,
		0 ignored issues – show Duplication introduced 2019-03-06 18:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
93		regress_f='regress.obj', rows=4, cols=4, ires=(512, 512)):
94		'''Tracks particles in input image using Trackmate.
95
96		A function based on imagej.track that downloads the image from S3, tracks
97		particles using Trackmate, and uploads the resulting trajectory file to S3.
98		Designed to work with Cloudknot for parallelizable workflows. Typically,
99		this function is used in conjunction with kn.split and kn.assemble_msds for
100		a complete analysis.
101
102		Parameters
103		----------
104		subprefix : string
105		Prefix (everything except file extension and folder name) of image file
106		to be tracked. Must be available on S3.
107		remote_folder : string
108		Folder name where file is contained on S3 in the bucket specified by
109		'bucket'.
110		bucket : string
111		S3 bucket where file is contained.
112		regress_f : string
113		Name of regress object used to predict quality parameter.
114		rows : int
115		Number of rows to split image into.
116		cols : int
117		Number of columns to split image into.
118		ires : tuple of int
119		Resolution of split images. Really just a sanity check to make sure you
120		correctly splitting.
121		tparams : dict
122		Dictionary containing tracking parameters to Trackmate analysis.
123
124		'''
125
126		import os
127		import os.path as op
128		import boto3
129		from sklearn.externals import joblib
130		import diff_classifier.aws as aws
131		import diff_classifier.utils as ut
132		import diff_classifier.msd as msd
133		import diff_classifier.features as ft
134		import diff_classifier.imagej as ij
135
136		local_folder = os.getcwd()
137		filename = '{}.tif'.format(subprefix)
138		remote_name = remote_folder+'/'+filename
139		local_name = local_folder+'/'+filename
140		outfile = 'Traj_' + subprefix + '.csv'
141		local_im = op.join(local_folder, '{}.tif'.format(subprefix))
142		row = int(subprefix.split('_')[-2])
143		col = int(subprefix.split('_')[-1])
144
145		aws.download_s3(remote_folder+'/'+regress_f, regress_f, bucket_name=bucket)
146		with open(regress_f, 'rb') as fp:
147		regress = joblib.load(fp)
148
149		s3 = boto3.client('s3')
150
151		aws.download_s3('{}/{}'.format(remote_folder,
152		'{}.tif'.format(subprefix)),
153		local_im, bucket_name=bucket)
154		tparams['quality'] = ij.regress_tracking_params(regress, subprefix,
155		regmethod='PassiveAggressiveRegressor')
156
157		if row == rows-1:
158		tparams['ydims'] = (tparams['ydims'][0], ires[1] - 27)
159
160		ij.track(local_im, outfile, template=None, fiji_bin=None,
161		tparams=tparams)
162		aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
163		print("Done with tracking. Should output file of name {}".format(
164		remote_folder+'/'+outfile))
165
166
167	View Code Duplication	def assemble_msds(prefix, remote_folder, bucket,
		0 ignored issues – show Duplication introduced 2019-03-06 18:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
168		ires=(512, 512), frames=651):
169		'''Calculates MSDs and features from input trajectory files
170
171		A function based on msd.all_msds2 and features.calculate_features, creates
172		msd and feature csv files from input trajectory files and uploads to S3.
173		Designed to work with Cloudknot for parallelizable workflows. Typically,
174		this function is used in conjunction with kn.split and kn.tracking for an
175		entire workflow.
176
177		prefix : string
178		Prefix (everything except file extension and folder name) of image file
179		to be tracked. Must be available on S3.
180		remote_folder : string
181		Folder name where file is contained on S3 in the bucket specified by
182		'bucket'.
183		bucket : string
184		S3 bucket where file is contained.
185		ires : tuple of int
186		Resolution of split images. Really just a sanity check to make sure you
187		correctly splitting.
188		frames : int
189		Number of frames in input videos.
190
191		'''
192
193		import os
194		import boto3
195		import diff_classifier.aws as aws
196		import diff_classifier.msd as msd
197		import diff_classifier.features as ft
198		import diff_classifier.utils as ut
199
200		filename = '{}.tif'.format(prefix)
201		remote_name = remote_folder+'/'+filename
202		msd_file = 'msd_{}.csv'.format(prefix)
203		ft_file = 'features_{}.csv'.format(prefix)
204
205		s3 = boto3.client('s3')
206
207		# names = []
208		# for i in range(0, 4):
209		# for j in range(0, 4):
210		# names.append('{}_{}_{}.tif'.format(prefix, i, j))
211		all_objects = s3.list_objects(Bucket=bucket,
212		Prefix='{}/{}_'.format(remote_folder,
213		prefix))
214		names = []
215		rows = 0
216		cols = 0
217		for entry in all_objects['Contents']:
218		name = entry['Key'].split('/')[-1]
219		names.append(name)
220		row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
221		col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])
222		if row > rows:
223		rows = row
224		if col > cols:
225		cols = col
226		rows = rows + 1
227		cols = cols + 1
228
229		counter = 0
230		for name in names:
231		row = int(name.split(prefix)[1].split('.')[0].split('_')[-2])
232		col = int(name.split(prefix)[1].split('.')[0].split('_')[-1])
233
234		filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
235		aws.download_s3(remote_folder+'/'+filename, filename,
236		bucket_name=bucket)
237		local_name = filename
238
239		if counter == 0:
240		to_add = ut.csv_to_pd(local_name)
241		to_add['X'] = to_add['X'] + ires[0]*col
242		to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
243		merged = msd.all_msds2(to_add, frames=frames)
244		else:
245
246		if merged.shape[0] > 0:
		0 ignored issues – show introduced 2018-03-23 20:12 UTC by Report Bug Copy Issue Report The variable `merged` does not seem to be defined in case the `for` loop on line `230` is not entered. Are you sure this can never be the case? Loading history...
247		to_add = ut.csv_to_pd(local_name)
248		to_add['X'] = to_add['X'] + ires[0]*col
249		to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
250		to_add['Track_ID'] = to_add['Track_ID'
251		] + max(merged['Track_ID']) + 1
252		else:
253		to_add = ut.csv_to_pd(local_name)
254		to_add['X'] = to_add['X'] + ires[0]*col
255		to_add['Y'] = ires[1] - to_add['Y'] + ires[1]*(rows-1-row)
256		to_add['Track_ID'] = to_add['Track_ID']
257
258		merged = merged.append(msd.all_msds2(to_add, frames=frames))
259		print('Done calculating MSDs for row {} and col {}'.format(row,
260		col))
261		counter = counter + 1
262
263		merged.to_csv(msd_file)
264		aws.upload_s3(msd_file, remote_folder+'/'+msd_file, bucket_name=bucket)
265		merged_ft = ft.calculate_features(merged)
266		merged_ft.to_csv(ft_file)
267		aws.upload_s3(ft_file, remote_folder+'/'+ft_file, bucket_name=bucket)
268
269		os.remove(ft_file)
270		os.remove(msd_file)
271		for name in names:
272		outfile = 'Traj_' + name.split('.')[0] + '.csv'
273		os.remove(outfile)
274
275
276	View Code Duplication	def split_track_msds(prefix, remote_folder, bucket, tparams,
		0 ignored issues – show Duplication introduced 2019-03-06 18:05 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
277		rows=4, cols=4, ores=(2048, 2048), ires=(512, 512),
278		to_split=False, regress_f='regress.obj', frames=651):
279		'''Splits images, track particles, and calculates MSDs
280
281		A composite function designed to work with Cloudknot to split images,
282		track particles, and calculate MSDs.
283
284		Parameters
285		----------
286		prefix : string
287		Prefix (everything except file extension and folder name) of image file
288		to be tracked. Must be available on S3.
289		remote_folder : string
290		Folder name where file is contained on S3 in the bucket specified by
291		'bucket'.
292		bucket : string
293		S3 bucket where file is contained.
294		rows : int
295		Number of rows to split image into.
296		cols : int
297		Number of columns to split image into.
298		ores : tuple of int
299		Original resolution of input image.
300		ires : tuple of int
301		Resolution of split images. Really just a sanity check to make sure you
302		correctly splitting.
303		to_split : bool
304		If True, will perform image splitting.
305		regress_f : string
306		Name of regress object used to predict quality parameter.
307		frames : int
308		Number of frames in input videos.
309		tparams : dict
310		Dictionary containing tracking parameters to Trackmate analysis.
311
312		'''
313
314		if to_split:
315		split(prefix=prefix, remote_folder=remote_folder, bucket=bucket,
316		rows=rows, cols=cols, ores=ores, ires=ires)
317
318		pref = []
319		for row in range(0, rows):
		0 ignored issues – show Comprehensibility Best Practice introduced 2019-07-12 04:49 UTC by Report Bug Copy Issue Report The variable `range` does not seem to be defined. Loading history...
320		for col in range(0, cols):
321		pref.append("{}_{}_{}".format(prefix, row, col))
322
323		for subprefix in pref:
324		tracking(subprefix=subprefix, remote_folder=remote_folder, bucket=bucket,
325		regress_f=regress_f, rows=rows, cols=cols, ires=ires,
326		tparams=tparams)
327
328		assemble_msds(prefix=prefix, remote_folder=remote_folder, bucket=bucket,
329		ires=ires, frames=frames)
330
331
332		# def sensitivity_it(counter):
333		# '''Performs sensitivity analysis on single input image
334		#
335		# An example function (not designed for re-use) of a sensitivity analysis that
336		# demonstrates the impact of input tracking parameters on output MSDs and
337		# features.
338		#
339		# '''
340		#
341		# import matplotlib as mpl
342		# mpl.use('Agg')
343		# import matplotlib.pyplot as plt
344		# import diff_classifier.aws as aws
345		# import diff_classifier.utils as ut
346		# import diff_classifier.msd as msd
347		# import diff_classifier.features as ft
348		# import diff_classifier.imagej as ij
349		# import diff_classifier.heatmaps as hm
350		#
351		# from scipy.spatial import Voronoi
352		# import scipy.stats as stats
353		# from shapely.geometry import Point
354		# from shapely.geometry.polygon import Polygon
355		# import matplotlib.cm as cm
356		# import os
357		# import os.path as op
358		# import numpy as np
359		# import numpy.ma as ma
360		# import pandas as pd
361		# import boto3
362		# import itertools
363		#
364		# # Sweep parameters
365		# # ----------------------------------
366		# radius = [4.5, 6.0, 7.0]
367		# do_median_filtering = [True, False]
368		# quality = [1.5, 4.5, 8.5]
369		# linking_max_distance = [6.0, 10.0, 15.0]
370		# gap_closing_max_distance = [6.0, 10.0, 15.0]
371		# max_frame_gap = [1, 2, 5]
372		# track_displacement = [0.0, 10.0, 20.0]
373		#
374		# sweep = [radius, do_median_filtering, quality, linking_max_distance,
375		# gap_closing_max_distance, max_frame_gap, track_displacement]
376		# all_params = list(itertools.product(*sweep))
377		#
378		# # Variable prep
379		# # ----------------------------------
380		# s3 = boto3.client('s3')
381		#
382		# folder = '01_18_Experiment'
383		# s_folder = '{}/sensitivity'.format(folder)
384		# local_folder = '.'
385		# prefix = "P1_S1_R_0001_2_2"
386		# name = "{}.tif".format(prefix)
387		# local_im = op.join(local_folder, name)
388		# aws.download_s3('{}/{}/{}.tif'.format(folder, prefix.split('_')[0], prefix),
389		# '{}.tif'.format(prefix))
390		#
391		# outputs = np.zeros((len(all_params), len(all_params[0])+2))
392		#
393		# # Tracking and calculations
394		# # ------------------------------------
395		# params = all_params[counter]
396		# outfile = 'Traj_{}_{}.csv'.format(name.split('.')[0], counter)
397		# msd_file = 'msd_{}_{}.csv'.format(name.split('.')[0], counter)
398		# geo_file = 'geomean_{}_{}.csv'.format(name.split('.')[0], counter)
399		# geoS_file = 'geoSEM_{}_{}.csv'.format(name.split('.')[0], counter)
400		# msd_image = 'msds_{}_{}.png'.format(name.split('.')[0], counter)
401		# iter_name = "{}_{}".format(prefix, counter)
402		#
403		# ij.track(local_im, outfile, template=None, fiji_bin=None, radius=params[0], threshold=0.,
404		# do_median_filtering=params[1], quality=params[2], x=511, y=511, ylo=1, median_intensity=300.0, snr=0.0,
405		# linking_max_distance=params[3], gap_closing_max_distance=params[4], max_frame_gap=params[5],
406		# track_displacement=params[6])
407		#
408		# traj = ut.csv_to_pd(outfile)
409		# msds = msd.all_msds2(traj, frames=651)
410		# msds.to_csv(msd_file)
411		# gmean1, gSEM1 = hm.plot_individual_msds(iter_name, alpha=0.05)
412		# np.savetxt(geo_file, gmean1, delimiter=",")
413		# np.savetxt(geoS_file, gSEM1, delimiter=",")
414		#
415		# aws.upload_s3(outfile, '{}/{}'.format(s_folder, outfile))
416		# aws.upload_s3(msd_file, '{}/{}'.format(s_folder, msd_file))
417		# aws.upload_s3(geo_file, '{}/{}'.format(s_folder, geo_file))
418		# aws.upload_s3(geoS_file, '{}/{}'.format(s_folder, geoS_file))
419		# aws.upload_s3(msd_image, '{}/{}'.format(s_folder, msd_image))
420		#
421		# print('Successful parameter calculations for {}'.format(iter_name))
422
423
424	View Code Duplication	def geomean_msd(prefix, umppx=0.16, fps=100.02, upload=True,
		0 ignored issues – show Duplication introduced 2019-07-12 04:49 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
425		remote_folder="01_18_Experiment", bucket='ccurtis.data',
426		backup_frames=651):
427
428		import pandas as pd
429		import numpy as np
430		import numpy.ma as ma
431		import diff_classifier.aws as aws
432		import scipy.stats as stats
433
434		aws.download_s3('{}/msd_{}.csv'.format(remote_folder, prefix),
435		'msd_{}.csv'.format(prefix), bucket_name=bucket)
436		merged = pd.read_csv('msd_{}.csv'.format(prefix))
437		try:
438		particles = int(max(merged['Track_ID']))
439		frames = int(max(merged['Frame']))
440		ypos = np.zeros((particles+1, frames+1))
441
442		for i in range(0, particles+1):
		0 ignored issues – show Comprehensibility Best Practice introduced 2019-07-12 04:49 UTC by Report Bug Copy Issue Report The variable `range` does not seem to be defined. Loading history...
443		ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']umppxumppx
444		xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps
445
446		geo_mean = np.nanmean(ma.log(ypos), axis=0)
447		geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0,
448		nan_policy='omit'), 0.0)
449
450		except ValueError:
451		geo_mean = np.nan*np.ones(backup_frames)
452		geo_stder = np.nan*np.ones(backup_frames)
453
454		np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",")
455		np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",")
456
457		if upload:
458		aws.upload_s3('geomean_{}.csv'.format(prefix),
459		remote_folder+'/'+'geomean_{}.csv'.format(prefix),
460		bucket_name=bucket)
461		aws.upload_s3('geoSEM_{}.csv'.format(prefix),
462		remote_folder+'/'+'geoSEM_{}.csv'.format(prefix),
463		bucket_name=bucket)
464
465		return geo_mean, geo_stder

ccurtis7 / diff_classifier

diff_classifier.knotlets A last analyzed 2019-07-12 06:15 UTC

Complexity

Size/Duplication

Importance

5 Functions

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like

diff_classifier.knotlets A
last analyzed 2019-07-12 06:15 UTC