diff_classifier.heatmaps   A
last analyzed

Complexity

Total Complexity 38

Size/Duplication

Total Lines 525
Duplicated Lines 94.48 %

Importance

Changes 0
Metric Value
eloc 272
dl 496
loc 525
rs 9.36
c 0
b 0
f 0
wmc 38

7 Functions

Rating   Name   Duplication   Size   Complexity  
B plot_heatmap() 95 95 5
C voronoi_finite_polygons_2d() 86 86 10
A plot_scatterplot() 59 59 2
C plot_histogram() 95 95 8
A plot_particles_in_frame() 35 35 3
B plot_trajectories() 46 46 5
B plot_individual_msds() 80 81 5

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import matplotlib as mpl
2
import numpy as np
3
import pandas as pd
4
import matplotlib.pyplot as plt
5
from scipy.spatial import Voronoi
6
import scipy.stats as stats
7
import os
8
import os.path as op
9
from shapely.geometry import Point
10
from shapely.geometry.polygon import Polygon
11
import numpy.ma as ma
12
import matplotlib.cm as cm
13
import diff_classifier.aws as aws
14
15
16 View Code Duplication
def voronoi_finite_polygons_2d(vor, radius=None):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
17
    """
18
    Reconstruct infinite voronoi regions in a 2D diagram to finite
19
    regions.
20
21
    Parameters
22
    ----------
23
    vor : Voronoi
24
        Input diagram
25
    radius : float, optional
26
        Distance to 'points at infinity'.
27
28
    Returns
29
    -------
30
    regions : list of tuples
31
        Indices of vertices in each revised Voronoi regions.
32
    vertices : list of tuples
33
        Coordinates for revised Voronoi vertices. Same as coordinates
34
        of input vertices, with 'points at infinity' appended to the
35
        end.
36
37
    """
38
39
    if vor.points.shape[1] != 2:
40
        raise ValueError("Requires 2D input")
41
42
    new_regions = []
43
    new_vertices = vor.vertices.tolist()
44
45
    center = vor.points.mean(axis=0)
46
    if radius is None:
47
        radius = vor.points.ptp().max()
48
49
    # Construct a map containing all ridges for a given point
50
    all_ridges = {}
51
    for (p1, p2), (v1, v2) in zip(vor.ridge_points, vor.ridge_vertices):
52
        all_ridges.setdefault(p1, []).append((p2, v1, v2))
53
        all_ridges.setdefault(p2, []).append((p1, v1, v2))
54
55
    counter = 0
56
    for p1, region in enumerate(vor.point_region):
57
        try:
58
            vertices = vor.regions[region]
59
60
            if all(v >= 0 for v in vertices):
61
                # finite region
62
                new_regions.append(vertices)
63
                continue
64
65
            # reconstruct a non-finite region
66
            ridges = all_ridges[p1]
67
            new_region = [v for v in vertices if v >= 0]
68
69
            for p2, v1, v2 in ridges:
70
                if v2 < 0:
71
                    v1, v2 = v2, v1
72
                if v1 >= 0:
73
                    # finite ridge: already in the region
74
                    continue
75
76
                # Compute the missing endpoint of an infinite ridge
77
78
                t = vor.points[p2] - vor.points[p1]  # tangent
79
                t /= np.linalg.norm(t)
80
                n = np.array([-t[1], t[0]])  # normal
81
82
                midpoint = vor.points[[p1, p2]].mean(axis=0)
83
                direction = np.sign(np.dot(midpoint - center, n)) * n
84
                far_point = vor.vertices[v2] + direction * radius
85
86
                new_region.append(len(new_vertices))
87
                new_vertices.append(far_point.tolist())
88
89
            # sort region counterclockwise
90
            vs = np.asarray([new_vertices[v] for v in new_region])
91
            c = vs.mean(axis=0)
92
            angles = np.arctan2(vs[:, 1] - c[1], vs[:, 0] - c[0])
93
            new_region = np.array(new_region)[np.argsort(angles)]
94
95
            # finish
96
            new_regions.append(new_region.tolist())
97
        except KeyError:
98
            counter = counter + 1
99
            # print('Oops {}'.format(counter))
100
101
    return new_regions, np.asarray(new_vertices)
102
103
104 View Code Duplication
def plot_heatmap(prefix, feature='asymmetry1', vmin=0, vmax=1, resolution=512, rows=4, cols=4,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
105
                 upload=True, dpi=None, figsize=(12, 10), remote_folder = "01_18_Experiment",
106
                 bucket='ccurtis.data'):
107
    """
108
    Plot heatmap of trajectories in video with colors corresponding to features.
109
110
    Parameters
111
    ----------
112
    prefix: string
113
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
114
    feature: string
115
        Feature to be plotted.  See features_analysis.py
116
    vmin: float64
117
        Lower intensity bound for heatmap.
118
    vmax: float64
119
        Upper intensity bound for heatmap.
120
    resolution: int
121
        Resolution of base image.  Only needed to calculate bounds of image.
122
    rows: int
123
        Rows of base images used to build tiled image.
124
    cols: int
125
        Columns of base images used to build tiled images.
126
    upload: boolean
127
        True if you want to upload to s3.
128
    dpi: int
129
        Desired dpi of output image.
130
    figsize: list
131
        Desired dimensions of output image.
132
133
    Returns
134
    -------
135
136
    """
137
    # Inputs
138
    # ----------
139
    merged_ft = pd.read_csv('features_{}.csv'.format(prefix))
140
    string = feature
141
    leveler = merged_ft[string]
142
    t_min = vmin
143
    t_max = vmax
144
    ires = resolution
145
146
    # Building points and color schemes
147
    # ----------
148
    zs = ma.masked_invalid(merged_ft[string])
149
    zs = ma.masked_where(zs <= t_min, zs)
150
    zs = ma.masked_where(zs >= t_max, zs)
151
    to_mask = ma.getmask(zs)
152
    zs = ma.compressed(zs)
153
154
    xs = ma.compressed(ma.masked_where(to_mask, merged_ft['X'].astype(int)))
155
    ys = ma.compressed(ma.masked_where(to_mask, merged_ft['Y'].astype(int)))
156
    points = np.zeros((xs.shape[0], 2))
157
    points[:, 0] = xs
158
    points[:, 1] = ys
159
    vor = Voronoi(points)
160
161
    # Plot
162
    # ----------
163
    fig = plt.figure(figsize=figsize, dpi=dpi)
164
    regions, vertices = voronoi_finite_polygons_2d(vor)
165
    my_map = cm.get_cmap('viridis')
166
    norm = mpl.colors.Normalize(t_min, t_max, clip=True)
167
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.viridis)
168
169
    test = 0
170
    p2 = 0
171
    counter = 0
172
    for i in range(0, points.shape[0]-1):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable range does not seem to be defined.
Loading history...
173
        try:
174
            polygon = vertices[regions[p2]]
175
            point1 = Point(points[test, :])
176
            poly1 = Polygon(polygon)
177
            check = poly1.contains(point1)
178
            if check:
179
                plt.fill(*zip(*polygon), color=my_map(norm(zs[test])), alpha=0.7)
180
                p2 = p2 + 1
181
                test = test + 1
182
            else:
183
                p2 = p2
184
                test = test + 1
185
        except IndexError:
186
            print('Index mismatch possible.')
187
188
    mapper.set_array(10)
189
    plt.colorbar(mapper)
190
    plt.xlim(0, ires*cols)
191
    plt.ylim(0, ires*rows)
192
    plt.axis('off')
193
194
    print('Plotted {} heatmap successfully.'.format(prefix))
195
    outfile = 'hm_{}_{}.png'.format(feature, prefix)
196
    fig.savefig(outfile, bbox_inches='tight')
197
    if upload == True:
198
        aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
199
200
201 View Code Duplication
def plot_scatterplot(prefix, feature='asymmetry1', vmin=0, vmax=1, resolution=512, rows=4, cols=4,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
202
                     dotsize=10, figsize=(12, 10), upload=True, remote_folder = "01_18_Experiment",
203
                     bucket='ccurtis.data'):
204
    """
205
    Plot scatterplot of trajectories in video with colors corresponding to features.
206
207
    Parameters
208
    ----------
209
    prefix: string
210
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
211
    feature: string
212
        Feature to be plotted.  See features_analysis.py
213
    vmin: float64
214
        Lower intensity bound for heatmap.
215
    vmax: float64
216
        Upper intensity bound for heatmap.
217
    resolution: int
218
        Resolution of base image.  Only needed to calculate bounds of image.
219
    rows: int
220
        Rows of base images used to build tiled image.
221
    cols: int
222
        Columns of base images used to build tiled images.
223
    upload: boolean
224
        True if you want to upload to s3.
225
226
    """
227
    # Inputs
228
    # ----------
229
    merged_ft = pd.read_csv('features_{}.csv'.format(prefix))
230
    string = feature
231
    leveler = merged_ft[string]
232
    t_min = vmin
233
    t_max = vmax
234
    ires = resolution
235
236
    norm = mpl.colors.Normalize(t_min, t_max, clip=True)
237
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.viridis)
238
239
    zs = ma.masked_invalid(merged_ft[string])
240
    zs = ma.masked_where(zs <= t_min, zs)
241
    zs = ma.masked_where(zs >= t_max, zs)
242
    to_mask = ma.getmask(zs)
243
    zs = ma.compressed(zs)
244
    xs = ma.compressed(ma.masked_where(to_mask, merged_ft['X'].astype(int)))
245
    ys = ma.compressed(ma.masked_where(to_mask, merged_ft['Y'].astype(int)))
246
247
    fig = plt.figure(figsize=figsize)
248
    plt.scatter(xs, ys, c=zs, s=dotsize)
249
    mapper.set_array(10)
250
    plt.colorbar(mapper)
251
    plt.xlim(0, ires*cols)
252
    plt.ylim(0, ires*rows)
253
    plt.axis('off')
254
255
    print('Plotted {} scatterplot successfully.'.format(prefix))
256
    outfile = 'scatter_{}_{}.png'.format(feature, prefix)
257
    fig.savefig(outfile, bbox_inches='tight')
258
    if upload == True:
259
        aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
260
261
262 View Code Duplication
def plot_trajectories(prefix, resolution=512, rows=4, cols=4, upload=True,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
263
                      remote_folder = "01_18_Experiment", bucket='ccurtis.data',
264
                      figsize=(12, 12), subset=True, size=1000):
265
    """
266
    Plot trajectories in video.
267
268
    Parameters
269
    ----------
270
    prefix: string
271
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
272
    resolution: int
273
        Resolution of base image.  Only needed to calculate bounds of image.
274
    rows: int
275
        Rows of base images used to build tiled image.
276
    cols: int
277
        Columns of base images used to build tiled images.
278
    upload: boolean
279
        True if you want to upload to s3.
280
281
    """
282
    merged = pd.read_csv('msd_{}.csv'.format(prefix))
283
    particles = int(max(merged['Track_ID']))
284
    if particles < size:
285
        size = particles - 1
286
    else:
287
        pass
288
    particles = np.linspace(0, particles, particles-1).astype(int)
289
    if subset:
290
        particles = np.random.choice(particles, size=size, replace=False)
291
    ires = resolution
292
293
    fig = plt.figure(figsize=figsize)
294
    for part in particles:
295
        x = merged[merged['Track_ID'] == part]['X']
296
        y = merged[merged['Track_ID'] == part]['Y']
297
        plt.plot(x, y, color='k', alpha=0.7)
298
299
    plt.xlim(0, ires*cols)
300
    plt.ylim(0, ires*rows)
301
    plt.axis('off')
302
303
    print('Plotted {} trajectories successfully.'.format(prefix))
304
    outfile = 'traj_{}.png'.format(prefix)
305
    fig.savefig(outfile, bbox_inches='tight')
306
    if upload:
307
        aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
308
309
310 View Code Duplication
def plot_histogram(prefix, xlabel='Log Diffusion Coefficient Dist', ylabel='Trajectory Count',
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
311
                   fps=100.02, umppx=0.16, frames=651, y_range=100, frame_interval=20, frame_range=100,
312
                   analysis='log', theta='D', upload=True, remote_folder = "01_18_Experiment",
313
                   bucket='ccurtis.data'):
314
    """
315
    Plot heatmap of trajectories in video with colors corresponding to features.
316
317
    Parameters
318
    ----------
319
    prefix: string
320
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
321
    xlabel: string
322
        X axis label.
323
    ylabel: string
324
        Y axis label.
325
    fps: float64
326
        Frames per second of video.
327
    umppx: float64
328
        Resolution of video in microns per pixel.
329
    frames: int
330
        Number of frames in video.
331
    y_range: float64 or int
332
        Desire y range of graph.
333
    frame_interval: int
334
        Desired spacing between MSDs/Deffs to be plotted.
335
    analysis: string
336
        Desired output format.  If log, will plot log(MSDs/Deffs)
337
    theta: string
338
        Desired output.  D for diffusion coefficients.  Anything else, MSDs.
339
    upload: boolean
340
        True if you want to upload to s3.
341
342
    """
343
    merged = pd.read_csv('msd_{}.csv'.format(prefix))
344
    data = merged
345
    frame_range = range(frame_interval, frame_range+frame_interval, frame_interval)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable range does not seem to be defined.
Loading history...
346
347
    # load data
348
349
    # generate keys for legend
350
    bar = {}
351
    keys = []
352
    entries = []
353
    for i in range(0, len(list(frame_range))):
354
        keys.append(i)
355
        entries.append(str(10*frame_interval*(i+1)) + 'ms')
356
357
    set_x_limit = False
358
    set_y_limit = True
359
    colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
360
    fig = plt.figure(figsize=(16, 6))
361
362
    counter = 0
363
    for i in frame_range:
364
        toi = i/fps
365
        if theta == "MSD":
366
            factor = 1
367
        else:
368
            factor = 4*toi
369
370
        if analysis == 'log':
371
            dist = np.log(umppx*umppx*merged.loc[merged.Frame == i, 'MSDs'].dropna()/factor)
372
            test_bins = np.linspace(-5, 5, 76)
373
        else:
374
            dist = umppx*umppx*merged.loc[merged.Frame == i, 'MSDs'].dropna()/factor
375
            test_bins = np.linspace(0, 20, 76)
376
377
        histogram, test_bins = np.histogram(dist, bins=test_bins)
378
379
        # Plot_general_histogram_code
380
        avg = np.mean(dist)
381
382
        plt.rc('axes', linewidth=2)
383
        plot = histogram
384
        bins = test_bins
385
        width = 0.7 * (bins[1] - bins[0])
386
        center = (bins[:-1] + bins[1:])/2
387
        bar[keys[counter]] = plt.bar(center, plot, align='center', width=width, color=colors[counter], label=entries[counter])
388
        plt.axvline(avg, color=colors[counter])
389
        plt.xlabel(xlabel, fontsize=30)
390
        plt.ylabel(ylabel, fontsize=30)
391
        plt.tick_params(axis='both', which='major', labelsize=20)
392
393
        counter = counter + 1
394
        if set_y_limit:
395
            plt.gca().set_ylim([0, y_range])
396
397
        if set_x_limit:
398
            plt.gca().set_xlim([0, x_range])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable x_range does not seem to be defined.
Loading history...
399
400
        plt.legend(fontsize=20, frameon=False)
401
    outfile = 'hist_{}.png'.format(prefix)
402
    fig.savefig(outfile, bbox_inches='tight')
403
    if upload==True:
404
        aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
405
406
407 View Code Duplication
def plot_particles_in_frame(prefix, x_range=600, y_range=2000, upload=True,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
408
                            remote_folder = "01_18_Experiment", bucket='ccurtis.data'):
409
    """
410
    Plot number of particles per frame as a function of time.
411
412
    Parameters
413
    ----------
414
    prefix: string
415
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
416
    x_range: float64 or int
417
        Desire x range of graph.
418
    y_range: float64 or int
419
        Desire y range of graph.
420
    upload: boolean
421
        True if you want to upload to s3.
422
423
    """
424
    merged = pd.read_csv('msd_{}.csv'.format(prefix))
425
    frames = int(max(merged['Frame']))
426
    framespace = np.linspace(0, frames, frames)
427
    particles = np.zeros((framespace.shape[0]))
428
    for i in range(0, frames):
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable range does not seem to be defined.
Loading history...
429
        particles[i] = merged.loc[merged.Frame == i, 'MSDs'].dropna().shape[0]
430
431
    fig = plt.figure(figsize=(5, 5))
432
    plt.plot(framespace, particles, linewidth=4)
433
    plt.xlim(0, x_range)
434
    plt.ylim(0, y_range)
435
    plt.xlabel('Frames', fontsize=20)
436
    plt.ylabel('Particles', fontsize=20)
437
438
    outfile = 'in_frame_{}.png'.format(prefix)
439
    fig.savefig(outfile, bbox_inches='tight')
440
    if upload == True:
441
        aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
442
443
444 View Code Duplication
def plot_individual_msds(prefix, x_range=100, y_range=20, umppx=0.16, fps=100.02, alpha=0.1, folder='.', upload=True,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
445
                         remote_folder="01_18_Experiment", bucket='ccurtis.data', figsize=(10, 10), subset=True, size=1000,
446
                         dpi=300):
447
    """
448
    Plot MSDs of trajectories and the geometric average.
449
450
    Parameters
451
    ----------
452
    prefix: string
453
        Prefix of file name to be plotted e.g. features_P1.csv prefix is P1.
454
    x_range: float64 or int
455
        Desire x range of graph.
456
    y_range: float64 or int
457
        Desire y range of graph.
458
    fps: float64
459
        Frames per second of video.
460
    umppx: float64
461
        Resolution of video in microns per pixel.
462
    alpha: float64
463
        Transparency factor.  Between 0 and 1.
464
    upload: boolean
465
        True if you want to upload to s3.
466
467
    Returns
468
    -------
469
    geo_mean: numpy array
470
        Geometric mean of trajectory MSDs at all time points.
471
    geo_SEM: numpy array
472
        Geometric standard errot of trajectory MSDs at all time points.
473
474
    """
475
476
    merged = pd.read_csv('{}/msd_{}.csv'.format(folder, prefix))
477
478
    fig = plt.figure(figsize=figsize)
479
    particles = int(max(merged['Track_ID']))
480
481
    if particles < size:
482
        size = particles - 1
483
    else:
484
        pass
485
486
    frames = int(max(merged['Frame']))
487
488
    y = merged['Y'].values.reshape((particles+1, frames+1))*umppx*umppx
489
    x = merged['X'].values.reshape((particles+1, frames+1))/fps
490
#     for i in range(0, particles+1):
491
#         y[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx
492
#         x = merged.loc[merged.Track_ID == i, 'Frame']/fps
493
494
    particles = np.linspace(0, particles, particles-1).astype(int)
495
    if subset:
496
        particles = np.random.choice(particles, size=size, replace=False)
497
498
    y = np.zeros((particles.shape[0], frames+1))
499
    for idx, val in enumerate(particles):
500
        y[idx, :] = merged.loc[merged.Track_ID == val, 'MSDs']*umppx*umppx
501
        x = merged.loc[merged.Track_ID == val, 'Frame']/fps
502
        plt.plot(x, y[idx, :], 'k', alpha=alpha)
503
504
    geo_mean = np.nanmean(ma.log(y), axis=0)
505
    geo_SEM = stats.sem(ma.log(y), axis=0, nan_policy='omit')
506
    plt.plot(x, np.exp(geo_mean), 'k', linewidth=4)
507
    plt.plot(x, np.exp(geo_mean-geo_SEM), 'k--', linewidth=2)
508
    plt.plot(x, np.exp(geo_mean+geo_SEM), 'k--', linewidth=2)
509
    plt.xlim(0, x_range)
510
    plt.ylim(0, y_range)
511
    plt.xlabel('Tau (s)', fontsize=25)
512
    plt.ylabel(r'Mean Squared Displacement ($\mu$m$^2$)', fontsize=25)
513
514
    outfile = '{}/msds_{}.png'.format(folder, prefix)
515
    outfile2 = '{}/geomean_{}.csv'.format(folder, prefix)
516
    outfile3 = '{}/geoSEM_{}.csv'.format(folder, prefix)
517
    fig.savefig(outfile, bbox_inches='tight', dpi=dpi)
518
    np.savetxt(outfile2, geo_mean, delimiter=",")
519
    np.savetxt(outfile3, geo_SEM, delimiter=",")
520
    if upload:
521
        aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name=bucket)
522
        aws.upload_s3(outfile2, remote_folder+'/'+outfile2, bucket_name=bucket)
523
        aws.upload_s3(outfile3, remote_folder+'/'+outfile3, bucket_name=bucket)
524
    return geo_mean, geo_SEM
525