Total Complexity | 64 |
Total Lines | 1047 |
Duplicated Lines | 90.74 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like diff_classifier.msd often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | """Functions to calculate mean squared displacements from trajectory data |
||
2 | |||
3 | This module includes functions to calculate mean squared displacements and |
||
4 | additional measures from input trajectory datasets as calculated by the |
||
5 | Trackmate ImageJ plugin. |
||
6 | |||
7 | """ |
||
8 | import warnings |
||
9 | import random as rand |
||
10 | |||
11 | import pandas as pd |
||
12 | import numpy as np |
||
13 | import numpy.ma as ma |
||
14 | import scipy.stats as stats |
||
15 | from scipy import interpolate |
||
16 | import matplotlib.pyplot as plt |
||
17 | from matplotlib.pyplot import cm |
||
18 | import diff_classifier.aws as aws |
||
19 | from scipy.ndimage.morphology import distance_transform_edt as eudist |
||
20 | |||
21 | |||
22 | View Code Duplication | def nth_diff(dataframe, n=1, axis=0): |
|
|
|||
23 | """Calculates the nth difference between vector elements |
||
24 | |||
25 | Returns a new vector of size N - n containing the nth difference between |
||
26 | vector elements. |
||
27 | |||
28 | Parameters |
||
29 | ---------- |
||
30 | dataframe : pandas.core.series.Series of int or float |
||
31 | Input data on which differences are to be calculated. |
||
32 | n : int |
||
33 | Function calculated xpos(i) - xpos(i - n) for all values in pandas |
||
34 | series. |
||
35 | axis : {0, 1} |
||
36 | Axis along which differences are to be calculated. Default is 0. If 0, |
||
37 | input must be a pandas series. If 1, input must be a numpy array. |
||
38 | |||
39 | Returns |
||
40 | ------- |
||
41 | diff : pandas.core.series.Series of int or float |
||
42 | Pandas series of size N - n, where N is the original size of dataframe. |
||
43 | |||
44 | Examples |
||
45 | -------- |
||
46 | >>> df = np.ones((5, 10)) |
||
47 | >>> nth_diff(df) |
||
48 | array([[0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
49 | [0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
50 | [0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
51 | [0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
52 | [0., 0., 0., 0., 0., 0., 0., 0., 0.]]) |
||
53 | |||
54 | >>> df = np.ones((5, 10)) |
||
55 | >>> nth_diff (df) |
||
56 | array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
57 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
58 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
59 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) |
||
60 | |||
61 | """ |
||
62 | |||
63 | assert isinstance(n, int), "n must be an integer." |
||
64 | |||
65 | if dataframe.ndim == 1: |
||
66 | length = dataframe.shape[0] |
||
67 | if n <= length: |
||
68 | test1 = dataframe[:-n].reset_index(drop=True) |
||
69 | test2 = dataframe[n:].reset_index(drop=True) |
||
70 | diff = test2 - test1 |
||
71 | else: |
||
72 | diff = np.array([np.nan, np.nan]) |
||
73 | else: |
||
74 | length = dataframe.shape[0] |
||
75 | if n <= length: |
||
76 | if axis == 0: |
||
77 | test1 = dataframe[:-n, :] |
||
78 | test2 = dataframe[n:, :] |
||
79 | else: |
||
80 | test1 = dataframe[:, :-n] |
||
81 | test2 = dataframe[:, n:] |
||
82 | diff = test2 - test1 |
||
83 | else: |
||
84 | diff = np.array([np.nan, np.nan]) |
||
85 | |||
86 | return diff |
||
87 | |||
88 | |||
89 | View Code Duplication | def msd_calc(track, length=10): |
|
90 | """Calculates mean squared displacement of input track. |
||
91 | |||
92 | Returns numpy array containing MSD data calculated from an individual track. |
||
93 | |||
94 | Parameters |
||
95 | ---------- |
||
96 | track : pandas.core.frame.DataFrame |
||
97 | Contains, at a minimum a 'Frame', 'X', and 'Y' column |
||
98 | |||
99 | Returns |
||
100 | ------- |
||
101 | new_track : pandas.core.frame.DataFrame |
||
102 | Similar to input track. All missing frames of individual trajectories |
||
103 | are filled in with NaNs, and two new columns, MSDs and Gauss are added: |
||
104 | MSDs, calculated mean squared displacements using the formula |
||
105 | MSD = <(xpos-x0)**2> |
||
106 | Gauss, calculated Gaussianity |
||
107 | |||
108 | Examples |
||
109 | -------- |
||
110 | >>> data1 = {'Frame': [1, 2, 3, 4, 5], |
||
111 | ... 'X': [5, 6, 7, 8, 9], |
||
112 | ... 'Y': [6, 7, 8, 9, 10]} |
||
113 | >>> df = pd.DataFrame(data=data1) |
||
114 | >>> new_track = msd.msd_calc(df, 5) |
||
115 | |||
116 | >>> data1 = {'Frame': [1, 2, 3, 4, 5], |
||
117 | ... 'X': [5, 6, 7, 8, 9], |
||
118 | ... 'Y': [6, 7, 8, 9, 10]} |
||
119 | >>> df = pd.DataFrame(data=data1) |
||
120 | >>> new_track = msd.msd_calc(df) |
||
121 | |||
122 | """ |
||
123 | |||
124 | meansd = np.zeros(length) |
||
125 | gauss = np.zeros(length) |
||
126 | new_frame = np.linspace(1, length, length) |
||
127 | old_frame = track['Frame'] |
||
128 | oldxy = [track['X'], track['Y']] |
||
129 | fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False, |
||
130 | fill_value=np.nan), |
||
131 | interpolate.interp1d(old_frame, oldxy[1], bounds_error=False, |
||
132 | fill_value=np.nan)] |
||
133 | |||
134 | intxy = [ma.masked_equal(fxy[0](new_frame), np.nan), |
||
135 | ma.masked_equal(fxy[1](new_frame), np.nan)] |
||
136 | data1 = {'Frame': new_frame, |
||
137 | 'X': intxy[0], |
||
138 | 'Y': intxy[1] |
||
139 | } |
||
140 | new_track = pd.DataFrame(data=data1) |
||
141 | |||
142 | for frame in range(0, length-1): |
||
143 | xy = [np.square(nth_diff(new_track['X'], n=frame+1)), |
||
144 | np.square(nth_diff(new_track['Y'], n=frame+1))] |
||
145 | with warnings.catch_warnings(): |
||
146 | warnings.simplefilter("ignore", category=RuntimeWarning) |
||
147 | meansd[frame+1] = np.nanmean(xy[0] + xy[1]) |
||
148 | gauss[frame+1] = np.nanmean(xy[0]**2 + xy[1]**2 |
||
149 | )/(2*(meansd[frame+1]**2)) |
||
150 | |||
151 | new_track['MSDs'] = pd.Series(meansd, index=new_track.index) |
||
152 | new_track['Gauss'] = pd.Series(gauss, index=new_track.index) |
||
153 | |||
154 | return new_track |
||
155 | |||
156 | |||
157 | View Code Duplication | def all_msds(data): |
|
158 | """Calculates mean squared displacements of a trajectory dataset |
||
159 | |||
160 | Returns numpy array containing MSD data of all tracks in a trajectory |
||
161 | pandas dataframe. |
||
162 | |||
163 | Parameters |
||
164 | ---------- |
||
165 | data : pandas.core.frame.DataFrame |
||
166 | Contains, at a minimum a 'Frame', 'Track_ID', 'X', and |
||
167 | 'Y' column. Note: it is assumed that frames begins at 1, not 0 with this |
||
168 | function. Adjust before feeding into function. |
||
169 | |||
170 | Returns |
||
171 | ------- |
||
172 | new_data : pandas.core.frame.DataFrame |
||
173 | Similar to input data. All missing frames of individual trajectories |
||
174 | are filled in with NaNs, and two new columns, MSDs and Gauss are added: |
||
175 | MSDs, calculated mean squared displacements using the formula |
||
176 | MSD = <(xpos-x0)**2> |
||
177 | Gauss, calculated Gaussianity |
||
178 | |||
179 | Examples |
||
180 | -------- |
||
181 | >>> data1 = {'Frame': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5], |
||
182 | ... 'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], |
||
183 | ... 'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5], |
||
184 | ... 'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]} |
||
185 | >>> df = pd.DataFrame(data=data1) |
||
186 | >>> all_msds(df) |
||
187 | |||
188 | """ |
||
189 | |||
190 | trackids = data.Track_ID.unique() |
||
191 | partcount = trackids.shape[0] |
||
192 | length = int(max(data['Frame'])) |
||
193 | new = {} |
||
194 | new['length'] = partcount*length |
||
195 | new['frame'] = np.zeros(new['length']) |
||
196 | new['ID'] = np.zeros(new['length']) |
||
197 | new['xy'] = [np.zeros(new['length']), |
||
198 | np.zeros(new['length'])] |
||
199 | meansd = np.zeros(new['length']) |
||
200 | gauss = np.zeros(new['length']) |
||
201 | |||
202 | for particle in range(0, partcount): |
||
203 | single_track = data.loc[data['Track_ID'] == |
||
204 | trackids[particle] |
||
205 | ].sort_values(['Track_ID', 'Frame'], |
||
206 | ascending=[1, 1] |
||
207 | ).reset_index(drop=True) |
||
208 | if particle == 0: |
||
209 | index1 = 0 |
||
210 | index2 = length |
||
211 | else: |
||
212 | index1 = index2 |
||
213 | index2 = index2 + length |
||
214 | new['single_track'] = msd_calc(single_track, length=length) |
||
215 | new['frame'][index1:index2] = np.linspace(1, length, length) |
||
216 | new['ID'][index1:index2] = particle+1 |
||
217 | new['xy'][0][index1:index2] = new['single_track']['X'] |
||
218 | new['xy'][1][index1:index2] = new['single_track']['Y'] |
||
219 | meansd[index1:index2] = new['single_track']['MSDs'] |
||
220 | gauss[index1:index2] = new['single_track']['Gauss'] |
||
221 | |||
222 | data1 = {'Frame': new['frame'], |
||
223 | 'Track_ID': new['ID'], |
||
224 | 'X': new['xy'][0], |
||
225 | 'Y': new['xy'][1], |
||
226 | 'MSDs': meansd, |
||
227 | 'Gauss': gauss} |
||
228 | new_data = pd.DataFrame(data=data1) |
||
229 | |||
230 | return new_data |
||
231 | |||
232 | |||
233 | View Code Duplication | def make_xyarray(data, length=651): |
|
234 | """Rearranges xy position data into 2d arrays |
||
235 | |||
236 | Rearranges xy data from input pandas dataframe into 2D numpy array. |
||
237 | |||
238 | Parameters |
||
239 | ---------- |
||
240 | data : pd.core.frame.DataFrame |
||
241 | Contains, at a minimum a 'Frame', 'Track_ID', 'X', and |
||
242 | 'Y' column. |
||
243 | length : int |
||
244 | Desired length or number of frames to which to extend trajectories. |
||
245 | Any trajectories shorter than the input length will have the extra space |
||
246 | filled in with NaNs. |
||
247 | |||
248 | Returns |
||
249 | ------- |
||
250 | xyft : dict of np.ndarray |
||
251 | Dictionary containing xy position data, frame data, and trajectory ID |
||
252 | data. Contains the following keys: |
||
253 | farray, frames data (length x particles) |
||
254 | tarray, trajectory ID data (length x particles) |
||
255 | xarray, x position data (length x particles) |
||
256 | yarray, y position data (length x particles) |
||
257 | |||
258 | Examples |
||
259 | -------- |
||
260 | >>> data1 = {'Frame': [0, 1, 2, 3, 4, 2, 3, 4, 5, 6], |
||
261 | ... 'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], |
||
262 | ... 'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5], |
||
263 | ... 'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]} |
||
264 | >>> df = pd.DataFrame(data=data1) |
||
265 | >>> length = max(df['Frame']) + 1 |
||
266 | >>> xyft = msd.make_xyarray(df, length=length) |
||
267 | {'farray': array([[0., 0.], |
||
268 | [1., 1.], |
||
269 | [2., 2.], |
||
270 | [3., 3.], |
||
271 | [4., 4.], |
||
272 | [5., 5.], |
||
273 | [6., 6.]]), |
||
274 | 'tarray': array([[1., 2.], |
||
275 | [1., 2.], |
||
276 | [1., 2.], |
||
277 | [1., 2.], |
||
278 | [1., 2.], |
||
279 | [1., 2.], |
||
280 | [1., 2.]]), |
||
281 | 'xarray': array([[ 5., nan], |
||
282 | [ 6., nan], |
||
283 | [ 7., 1.], |
||
284 | [ 8., 2.], |
||
285 | [ 9., 3.], |
||
286 | [nan, 4.], |
||
287 | 'yarray': [nan, 5.]]), |
||
288 | array([[ 6., nan], |
||
289 | [ 7., nan], |
||
290 | [ 8., 2.], |
||
291 | [ 9., 3.], |
||
292 | [10., 4.], |
||
293 | [nan, 5.], |
||
294 | [nan, 6.]])} |
||
295 | |||
296 | """ |
||
297 | |||
298 | # Initial values |
||
299 | first_p = int(min(data['Track_ID'])) |
||
300 | particles = int(max(data['Track_ID'])) - first_p + 1 |
||
301 | xyft = {} |
||
302 | xyft['xarray'] = np.zeros((length, particles)) |
||
303 | xyft['yarray'] = np.zeros((length, particles)) |
||
304 | xyft['farray'] = np.zeros((length, particles)) |
||
305 | xyft['tarray'] = np.zeros((length, particles)) |
||
306 | xyft['qarray'] = np.zeros((length, particles)) |
||
307 | xyft['snarray'] = np.zeros((length, particles)) |
||
308 | xyft['iarray'] = np.zeros((length, particles)) |
||
309 | |||
310 | track = data[data['Track_ID'] == first_p |
||
311 | ].sort_values(['Track_ID', 'Frame'], |
||
312 | ascending=[1, 1]).reset_index(drop=True) |
||
313 | new_frame = np.linspace(0, length-1, length) |
||
314 | |||
315 | old_frame = track['Frame'].values.astype(float) |
||
316 | oldxy = [track['X'].values, |
||
317 | track['Y'].values, |
||
318 | track['Quality'].values, |
||
319 | track['SN_Ratio'].values, |
||
320 | track['Mean_Intensity'].values] |
||
321 | fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False, |
||
322 | fill_value=np.nan), |
||
323 | interpolate.interp1d(old_frame, oldxy[1], bounds_error=False, |
||
324 | fill_value=np.nan), |
||
325 | interpolate.interp1d(old_frame, oldxy[2], bounds_error=False, |
||
326 | fill_value=np.nan), |
||
327 | interpolate.interp1d(old_frame, oldxy[3], bounds_error=False, |
||
328 | fill_value=np.nan), |
||
329 | interpolate.interp1d(old_frame, oldxy[4], bounds_error=False, |
||
330 | fill_value=np.nan)] |
||
331 | |||
332 | intxy = [fxy[0](new_frame), fxy[1](new_frame), fxy[2](new_frame), |
||
333 | fxy[3](new_frame), fxy[4](new_frame)] |
||
334 | |||
335 | # Fill in entire array |
||
336 | xyft['xarray'][:, 0] = intxy[0] |
||
337 | xyft['yarray'][:, 0] = intxy[1] |
||
338 | xyft['farray'][:, 0] = new_frame |
||
339 | xyft['tarray'][:, 0] = first_p |
||
340 | xyft['qarray'][:, 0] = intxy[2] |
||
341 | xyft['snarray'][:, 0] = intxy[3] |
||
342 | xyft['iarray'][:, 0] = intxy[4] |
||
343 | |||
344 | for part in range(first_p+1, first_p+particles): |
||
345 | track = data[data['Track_ID'] == part |
||
346 | ].sort_values(['Track_ID', 'Frame'], |
||
347 | ascending=[1, 1]).reset_index(drop=True) |
||
348 | |||
349 | old_frame = track['Frame'] |
||
350 | oldxy = [track['X'].values, |
||
351 | track['Y'].values, |
||
352 | track['Quality'].values, |
||
353 | track['SN_Ratio'].values, |
||
354 | track['Mean_Intensity'].values] |
||
355 | |||
356 | fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False, |
||
357 | fill_value=np.nan), |
||
358 | interpolate.interp1d(old_frame, oldxy[1], bounds_error=False, |
||
359 | fill_value=np.nan), |
||
360 | interpolate.interp1d(old_frame, oldxy[2], bounds_error=False, |
||
361 | fill_value=np.nan), |
||
362 | interpolate.interp1d(old_frame, oldxy[3], bounds_error=False, |
||
363 | fill_value=np.nan), |
||
364 | interpolate.interp1d(old_frame, oldxy[4], bounds_error=False, |
||
365 | fill_value=np.nan)] |
||
366 | |||
367 | intxy = [fxy[0](new_frame), fxy[1](new_frame), fxy[2](new_frame), |
||
368 | fxy[3](new_frame), fxy[4](new_frame)] |
||
369 | |||
370 | xyft['xarray'][:, part-first_p] = intxy[0] |
||
371 | xyft['yarray'][:, part-first_p] = intxy[1] |
||
372 | xyft['farray'][:, part-first_p] = new_frame |
||
373 | xyft['tarray'][:, part-first_p] = part |
||
374 | xyft['qarray'][:, part-first_p] = intxy[2] |
||
375 | xyft['snarray'][:, part-first_p] = intxy[3] |
||
376 | xyft['iarray'][:, part-first_p] = intxy[4] |
||
377 | |||
378 | return xyft |
||
379 | |||
380 | |||
381 | View Code Duplication | def all_msds2(data, frames=651): |
|
382 | """Calculates mean squared displacements of input trajectory dataset |
||
383 | |||
384 | Returns numpy array containing MSD data of all tracks in a trajectory pandas |
||
385 | dataframe. |
||
386 | |||
387 | Parameters |
||
388 | ---------- |
||
389 | data : pandas.core.frame.DataFrame |
||
390 | Contains, at a minimum a 'Frame', 'Track_ID', 'X', and |
||
391 | 'Y' column. Note: it is assumed that frames begins at 0. |
||
392 | |||
393 | Returns |
||
394 | ------- |
||
395 | new_data : pandas.core.frame.DataFrame |
||
396 | Similar to input data. All missing frames of individual trajectories |
||
397 | are filled in with NaNs, and two new columns, MSDs and Gauss are added: |
||
398 | MSDs, calculated mean squared displacements using the formula |
||
399 | MSD = <(xpos-x0)**2> |
||
400 | Gauss, calculated Gaussianity |
||
401 | |||
402 | Examples |
||
403 | -------- |
||
404 | >>> data1 = {'Frame': [0, 1, 2, 3, 4, 0, 1, 2, 3, 4], |
||
405 | ... 'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], |
||
406 | ... 'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5], |
||
407 | ... 'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]} |
||
408 | >>> df = pd.DataFrame(data=data1) |
||
409 | >>> cols = ['Frame', 'Track_ID', 'X', 'Y', 'MSDs', 'Gauss'] |
||
410 | >>> om flength = max(df['Frame']) + 1 |
||
411 | >>> msd.all_msds2(df, frames=length)[cols] |
||
412 | |||
413 | """ |
||
414 | if data.shape[0] > 2: |
||
415 | try: |
||
416 | xyft = make_xyarray(data, length=frames) |
||
417 | length = xyft['xarray'].shape[0] |
||
418 | particles = xyft['xarray'].shape[1] |
||
419 | |||
420 | meansd = np.zeros((length, particles)) |
||
421 | gauss = np.zeros((length, particles)) |
||
422 | |||
423 | for frame in range(0, length-1): |
||
424 | xpos = np.square(nth_diff(xyft['xarray'], n=frame+1)) |
||
425 | ypos = np.square(nth_diff(xyft['yarray'], n=frame+1)) |
||
426 | |||
427 | with warnings.catch_warnings(): |
||
428 | warnings.simplefilter("ignore", category=RuntimeWarning) |
||
429 | meansd[frame+1, :] = np.nanmean(xpos + ypos, axis=0) |
||
430 | gauss[frame+1, :] = np.nanmean(xpos**2 + ypos**2, axis=0 |
||
431 | )/(2*(meansd[frame+1]**2)) |
||
432 | |||
433 | data1 = {'Frame': xyft['farray'].flatten('F'), |
||
434 | 'Track_ID': xyft['tarray'].flatten('F'), |
||
435 | 'X': xyft['xarray'].flatten('F'), |
||
436 | 'Y': xyft['yarray'].flatten('F'), |
||
437 | 'MSDs': meansd.flatten('F'), |
||
438 | 'Gauss': gauss.flatten('F'), |
||
439 | 'Quality': xyft['qarray'].flatten('F'), |
||
440 | 'SN_Ratio': xyft['snarray'].flatten('F'), |
||
441 | 'Mean_Intensity': xyft['iarray'].flatten('F')} |
||
442 | |||
443 | new_data = pd.DataFrame(data=data1) |
||
444 | except ValueError: |
||
445 | data1 = {'Frame': [], |
||
446 | 'Track_ID': [], |
||
447 | 'X': [], |
||
448 | 'Y': [], |
||
449 | 'MSDs': [], |
||
450 | 'Gauss': [], |
||
451 | 'Quality': [], |
||
452 | 'SN_Ratio': [], |
||
453 | 'Mean_Intensity': []} |
||
454 | new_data = pd.DataFrame(data=data1) |
||
455 | except IndexError: |
||
456 | data1 = {'Frame': [], |
||
457 | 'Track_ID': [], |
||
458 | 'X': [], |
||
459 | 'Y': [], |
||
460 | 'MSDs': [], |
||
461 | 'Gauss': [], |
||
462 | 'Quality': [], |
||
463 | 'SN_Ratio': [], |
||
464 | 'Mean_Intensity': []} |
||
465 | new_data = pd.DataFrame(data=data1) |
||
466 | else: |
||
467 | data1 = {'Frame': [], |
||
468 | 'Track_ID': [], |
||
469 | 'X': [], |
||
470 | 'Y': [], |
||
471 | 'MSDs': [], |
||
472 | 'Gauss': [], |
||
473 | 'Quality': [], |
||
474 | 'SN_Ratio': [], |
||
475 | 'Mean_Intensity': []} |
||
476 | new_data = pd.DataFrame(data=data1) |
||
477 | |||
478 | return new_data |
||
479 | |||
480 | |||
481 | View Code Duplication | def geomean_msdisp(prefix, umppx=0.16, fps=100.02, upload=True, |
|
482 | remote_folder="01_18_Experiment", bucket='ccurtis.data', |
||
483 | backup_frames=651): |
||
484 | """Comptes geometric averages of mean squared displacement datasets |
||
485 | |||
486 | Calculates geometric averages and stadard errors for MSD datasets. Might |
||
487 | error out if not formatted as output from all_msds2. |
||
488 | |||
489 | Parameters |
||
490 | ---------- |
||
491 | prefix : string |
||
492 | Prefix of file name to be plotted e.g. features_P1.csv prefix is P1. |
||
493 | umppx : float |
||
494 | Microns per pixel of original images. |
||
495 | fps : float |
||
496 | Frames per second of video. |
||
497 | upload : bool |
||
498 | True if you want to upload to s3. |
||
499 | remote_folder : string |
||
500 | Folder in S3 bucket to upload to. |
||
501 | bucket : string |
||
502 | Name of S3 bucket to upload to. |
||
503 | |||
504 | Returns |
||
505 | ------- |
||
506 | geo_mean : numpy.ndarray |
||
507 | Geometric mean of trajectory MSDs at all time points. |
||
508 | geo_stder : numpy.ndarray |
||
509 | Geometric standard errot of trajectory MSDs at all time points. |
||
510 | |||
511 | """ |
||
512 | |||
513 | merged = pd.read_csv('msd_{}.csv'.format(prefix)) |
||
514 | try: |
||
515 | particles = int(max(merged['Track_ID'])) |
||
516 | frames = int(max(merged['Frame'])) |
||
517 | ypos = np.zeros((particles+1, frames+1)) |
||
518 | |||
519 | for i in range(0, particles+1): |
||
520 | ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx |
||
521 | xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps |
||
522 | |||
523 | geo_mean = np.nanmean(ma.log(ypos), axis=0) |
||
524 | geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0, |
||
525 | nan_policy='omit'), 0.0) |
||
526 | |||
527 | except ValueError: |
||
528 | geo_mean = np.nan*np.ones(backup_frames) |
||
529 | geo_stder = np.nan*np.ones(backup_frames) |
||
530 | |||
531 | np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",") |
||
532 | np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",") |
||
533 | |||
534 | if upload: |
||
535 | aws.upload_s3('geomean_{}.csv'.format(prefix), |
||
536 | remote_folder+'/'+'geomean_{}.csv'.format(prefix), |
||
537 | bucket_name=bucket) |
||
538 | aws.upload_s3('geoSEM_{}.csv'.format(prefix), |
||
539 | remote_folder+'/'+'geoSEM_{}.csv'.format(prefix), |
||
540 | bucket_name=bucket) |
||
541 | |||
542 | return geo_mean, geo_stder |
||
543 | |||
544 | |||
545 | View Code Duplication | def binning(experiments, wells=4, prefix='test'): |
|
546 | """Split set of input experiments into groups. |
||
547 | |||
548 | Parameters |
||
549 | ---------- |
||
550 | experiments : list of str |
||
551 | List of experiment names. |
||
552 | wells : int |
||
553 | Number of groups to divide experiments into. |
||
554 | |||
555 | Returns |
||
556 | ------- |
||
557 | slices : int |
||
558 | Number of experiments per group. |
||
559 | bins : dict of list of str |
||
560 | Dictionary, keys corresponding to group names, and elements containing |
||
561 | lists of experiments in each group. |
||
562 | bin_names : list of str |
||
563 | List of group names |
||
564 | |||
565 | """ |
||
566 | |||
567 | total_videos = len(experiments) |
||
568 | bins = {} |
||
569 | slices = int(total_videos/wells) |
||
570 | bin_names = [] |
||
571 | |||
572 | for num in range(0, wells): |
||
573 | slice1 = num*slices |
||
574 | slice2 = (num+1)*(slices) |
||
575 | pref = '{}_W{}'.format(prefix, num) |
||
576 | bins[pref] = experiments[slice1:slice2] |
||
577 | bin_names.append(pref) |
||
578 | return slices, bins, bin_names |
||
579 | |||
580 | |||
581 | View Code Duplication | def precision_weight(group, geo_stder): |
|
582 | """Calculates precision-based weights from input standard error data |
||
583 | |||
584 | Calculates precision weights to be used in precision-averaged MSD |
||
585 | calculations. |
||
586 | |||
587 | Parameters |
||
588 | ---------- |
||
589 | group : list of str |
||
590 | List of experiment names to average. Each element corresponds to a key |
||
591 | in geo_stder and geomean. |
||
592 | geo_stder : dict of numpy.ndarray |
||
593 | Each entry in dictionary corresponds to the standard errors of an MSD |
||
594 | profile, the key corresponding to an experiment name. |
||
595 | |||
596 | Returns |
||
597 | ------- |
||
598 | weights: numpy.ndarray |
||
599 | Precision weights to be used in precision averaging. |
||
600 | w_holder : numpy.ndarray |
||
601 | Precision values of each video at each time point. |
||
602 | |||
603 | """ |
||
604 | |||
605 | frames = np.shape(geo_stder[group[0]])[0] |
||
606 | slices = len(group) |
||
607 | video_counter = 0 |
||
608 | w_holder = np.zeros((slices, frames)) |
||
609 | for sample in group: |
||
610 | w_holder[video_counter, :] = 1/(geo_stder[sample]*geo_stder[sample]) |
||
611 | video_counter = video_counter + 1 |
||
612 | |||
613 | w_holder = ma.masked_equal(w_holder, 0.0) |
||
614 | w_holder = ma.masked_equal(w_holder, 1.0) |
||
615 | |||
616 | weights = ma.sum(w_holder, axis=0) |
||
617 | |||
618 | return weights, w_holder |
||
619 | |||
620 | |||
621 | View Code Duplication | def precision_averaging(group, geomean, geo_stder, weights, save=True, |
|
622 | bucket='ccurtis.data', folder='test', |
||
623 | experiment='test'): |
||
624 | """Calculates precision-weighted averages of MSD datasets. |
||
625 | |||
626 | Parameters |
||
627 | ---------- |
||
628 | group : list of str |
||
629 | List of experiment names to average. Each element corresponds to a key |
||
630 | in geo_stder and geomean. |
||
631 | geomean : dict of numpy.ndarray |
||
632 | Each entry in dictionary corresponds to an MSD profiles, they key |
||
633 | corresponding to an experiment name. |
||
634 | geo_stder : dict of numpy.ndarray |
||
635 | Each entry in dictionary corresponds to the standard errors of an MSD |
||
636 | profile, the key corresponding to an experiment name. |
||
637 | weights : numpy.ndarray |
||
638 | Precision weights to be used in precision averaging. |
||
639 | |||
640 | Returns |
||
641 | ------- |
||
642 | geo : numpy.ndarray |
||
643 | Precision-weighted averaged MSDs from experiments specified in group |
||
644 | geo_stder : numpy.ndarray |
||
645 | Precision-weighted averaged SEMs from experiments specified in group |
||
646 | |||
647 | """ |
||
648 | |||
649 | frames = np.shape(geo_stder[group[0]])[0] |
||
650 | slices = len(group) |
||
651 | |||
652 | video_counter = 0 |
||
653 | geo_holder = np.zeros((slices, frames)) |
||
654 | gstder_holder = np.zeros((slices, frames)) |
||
655 | w_holder = np.zeros((slices, frames)) |
||
656 | for sample in group: |
||
657 | w_holder[video_counter, :] = (1/(geo_stder[sample]*geo_stder[sample]) |
||
658 | )/weights |
||
659 | geo_holder[video_counter, :] = w_holder[video_counter, : |
||
660 | ] * geomean[sample] |
||
661 | gstder_holder[video_counter, :] = 1/(geo_stder[sample]*geo_stder[sample] |
||
662 | ) |
||
663 | video_counter = video_counter + 1 |
||
664 | |||
665 | w_holder = ma.masked_equal(w_holder, 0.0) |
||
666 | w_holder = ma.masked_equal(w_holder, 1.0) |
||
667 | geo_holder = ma.masked_equal(geo_holder, 0.0) |
||
668 | geo_holder = ma.masked_equal(geo_holder, 1.0) |
||
669 | gstder_holder = ma.masked_equal(gstder_holder, 0.0) |
||
670 | gstder_holder = ma.masked_equal(gstder_holder, 1.0) |
||
671 | |||
672 | geo = ma.sum(geo_holder, axis=0) |
||
673 | geo_stder = ma.sqrt((1/ma.sum(gstder_holder, axis=0))) |
||
674 | |||
675 | if save: |
||
676 | geo_f = 'geomean_{}.csv'.format(experiment) |
||
677 | gstder_f = 'geoSEM_{}.csv'.format(experiment) |
||
678 | np.savetxt(geo_f, geo, delimiter=',') |
||
679 | np.savetxt(gstder_f, geo_stder, delimiter=',') |
||
680 | aws.upload_s3(geo_f, '{}/{}'.format(folder, geo_f), bucket_name=bucket) |
||
681 | aws.upload_s3(gstder_f, '{}/{}'.format(folder, gstder_f), |
||
682 | bucket_name=bucket) |
||
683 | |||
684 | geodata = Bunch(geomean=geo, geostd=geo_stder, weighthold=w_holder, |
||
685 | geostdhold=gstder_holder) |
||
686 | |||
687 | return geodata |
||
688 | |||
689 | |||
690 | View Code Duplication | def plot_all_experiments(experiments, bucket='ccurtis.data', folder='test', |
|
691 | yrange=(10**-1, 10**1), fps=100.02, |
||
692 | xrange=(10**-2, 10**0), upload=True, |
||
693 | outfile='test.png', exponential=True, |
||
694 | labels=None, log=True): |
||
695 | """Plots precision-weighted averages of MSD datasets. |
||
696 | |||
697 | Plots pre-calculated precision-weighted averages of MSD datasets calculated |
||
698 | from precision_averaging and stored in an AWS S3 bucket. |
||
699 | |||
700 | Parameters |
||
701 | ---------- |
||
702 | group : list of str |
||
703 | List of experiment names to plot. Each experiment must have an MSD and |
||
704 | SEM file associated with it in s3. |
||
705 | bucket : str |
||
706 | S3 bucket from which to download data. |
||
707 | folder : str |
||
708 | Folder in s3 bucket from which to download data. |
||
709 | yrange : list of float |
||
710 | Y range of plot |
||
711 | xrange: list of float |
||
712 | X range of plot |
||
713 | upload : bool |
||
714 | True to upload to S3 |
||
715 | outfile : str |
||
716 | Filename of output image |
||
717 | |||
718 | """ |
||
719 | |||
720 | n = len(experiments) |
||
721 | |||
722 | if labels==None: |
||
723 | labels = experiments |
||
724 | |||
725 | color = iter(cm.viridis(np.linspace(0, 0.9, n))) |
||
726 | |||
727 | fig = plt.figure(figsize=(8.5, 8.5)) |
||
728 | ax = fig.add_subplot(111) |
||
729 | plt.xlim(xrange[0], xrange[1]) |
||
730 | plt.ylim(yrange[0], yrange[1]) |
||
731 | plt.xlabel('Tau (s)', fontsize=25) |
||
732 | plt.ylabel(r'Mean Squared Displacement ($\mu$m$^2$)', fontsize=25) |
||
733 | |||
734 | geo = {} |
||
735 | gstder = {} |
||
736 | counter = 0 |
||
737 | for experiment in experiments: |
||
738 | aws.download_s3('{}/geomean_{}.csv'.format(folder, experiment), |
||
739 | 'geomean_{}.csv'.format(experiment), bucket_name=bucket) |
||
740 | aws.download_s3('{}/geoSEM_{}.csv'.format(folder, experiment), |
||
741 | 'geoSEM_{}.csv'.format(experiment), bucket_name=bucket) |
||
742 | |||
743 | geo[counter] = np.genfromtxt('geomean_{}.csv'.format(experiment)) |
||
744 | gstder[counter] = np.genfromtxt('geoSEM_{}.csv'.format(experiment)) |
||
745 | geo[counter] = ma.masked_equal(geo[counter], 0.0) |
||
746 | gstder[counter] = ma.masked_equal(gstder[counter], 0.0) |
||
747 | |||
748 | frames = np.shape(gstder[counter])[0] |
||
749 | xpos = np.linspace(0, frames-1, frames)/fps |
||
750 | c = next(color) |
||
751 | |||
752 | if exponential: |
||
753 | ax.plot(xpos, np.exp(geo[counter]), c=c, linewidth=6, |
||
754 | label=labels[counter]) |
||
755 | ax.fill_between(xpos, np.exp(geo[counter] - 1.96*gstder[counter]), |
||
756 | np.exp(geo[counter] + 1.96*gstder[counter]), |
||
757 | color=c, alpha=0.4) |
||
758 | |||
759 | else: |
||
760 | ax.plot(xpos, geo[counter], c=c, linewidth=6, |
||
761 | label=labels[counter]) |
||
762 | ax.fill_between(xpos, geo[counter] - 1.96*gstder[counter], |
||
763 | geo[counter] + 1.96*gstder[counter], color=c, |
||
764 | alpha=0.4) |
||
765 | |||
766 | counter = counter + 1 |
||
767 | |||
768 | if log: |
||
769 | ax.set_xscale("log") |
||
770 | ax.set_yscale("log") |
||
771 | |||
772 | plt.legend(frameon=False, loc=2, prop={'size': 16}) |
||
773 | |||
774 | if upload: |
||
775 | fig.savefig(outfile, bbox_inches='tight') |
||
776 | aws.upload_s3(outfile, folder+'/'+outfile, bucket_name=bucket) |
||
777 | |||
778 | |||
779 | View Code Duplication | def checkerboard_mask(dims=(512, 512), squares=50, width=25): |
|
780 | """Creates a 2D Boolean checkerboard mask |
||
781 | |||
782 | Creates a Boolean array of evenly spaced squares. |
||
783 | Whitespace is set to True. |
||
784 | |||
785 | Parameters |
||
786 | ---------- |
||
787 | |||
788 | dims : tuple of int |
||
789 | Dimensions of desired Boolean array |
||
790 | squares : int |
||
791 | Dimensions of in individual square in array |
||
792 | width : int |
||
793 | Dimension of spacing between squares |
||
794 | |||
795 | Returns |
||
796 | ---------- |
||
797 | |||
798 | zeros : numpy.ndarray of bool |
||
799 | 2D Boolean array of evenly spaced squares |
||
800 | |||
801 | """ |
||
802 | zeros = np.zeros(dims) == 0 |
||
803 | square_d = squares |
||
804 | |||
805 | loy = width |
||
806 | hiy = loy + square_d |
||
807 | |||
808 | for j in range(50): |
||
809 | |||
810 | lox = width |
||
811 | hix = lox + square_d |
||
812 | for i in range(50): |
||
813 | |||
814 | if hix < 512 and hiy < 512: |
||
815 | zeros[loy:hiy, lox:hix] = False |
||
816 | elif hix < 512: |
||
817 | zeros[loy:512-1, lox:hix] = False |
||
818 | elif hiy < 512: |
||
819 | zeros[loy:hiy, lox:512-1] = False |
||
820 | else: |
||
821 | zeros[loy:512-1, lox:512-1] = False |
||
822 | break |
||
823 | |||
824 | lox = hix + width |
||
825 | hix = lox + square_d |
||
826 | |||
827 | loy = hiy + width |
||
828 | hiy = loy + square_d |
||
829 | |||
830 | return zeros |
||
831 | |||
832 | |||
833 | View Code Duplication | def random_walk(nsteps=100, seed=None, start=(0, 0), step=1, mask=None, |
|
834 | stuckprob=0.5): |
||
835 | """Creates 2d random walk trajectory. |
||
836 | |||
837 | Parameters |
||
838 | ---------- |
||
839 | nsteps : int |
||
840 | Number of steps for trajectory to move. |
||
841 | seed : int |
||
842 | Seed for pseudo-random number generator for reproducability. |
||
843 | start : tuple of int or float |
||
844 | Starting xy coordinates at which the random walk begins. |
||
845 | step : int or float |
||
846 | Magnitude of single step |
||
847 | mask : numpy.ndarray of bool |
||
848 | Mask of barriers contraining diffusion |
||
849 | stuckprop : float |
||
850 | Probability of "particle" adhering to barrier when it makes contact |
||
851 | |||
852 | Returns |
||
853 | ------- |
||
854 | x : numpy.ndarray |
||
855 | Array of x coordinates of random walk. |
||
856 | y : numpy.ndarray |
||
857 | Array of y coordinates of random walk. |
||
858 | |||
859 | """ |
||
860 | |||
861 | if type(mask) is np.ndarray: |
||
862 | while not mask[start[0], start[1]]: |
||
863 | start = (start[0], start[1]+1) |
||
864 | eumask = eudist(~mask) |
||
865 | |||
866 | np.random.seed(seed=seed) |
||
867 | |||
868 | x = np.zeros(nsteps) |
||
869 | y = np.zeros(nsteps) |
||
870 | x[0] = start[0] |
||
871 | y[0] = start[1] |
||
872 | |||
873 | # Checks to see if a mask is being used first |
||
874 | if not type(mask) is np.ndarray: |
||
875 | for i in range(1, nsteps): |
||
876 | val = rand.randint(1, 4) |
||
877 | if val == 1: |
||
878 | x[i] = x[i - 1] + step |
||
879 | y[i] = y[i - 1] |
||
880 | elif val == 2: |
||
881 | x[i] = x[i - 1] - step |
||
882 | y[i] = y[i - 1] |
||
883 | elif val == 3: |
||
884 | x[i] = x[i - 1] |
||
885 | y[i] = y[i - 1] + step |
||
886 | else: |
||
887 | x[i] = x[i - 1] |
||
888 | y[i] = y[i - 1] - step |
||
889 | else: |
||
890 | # print("Applied mask") |
||
891 | for i in range(1, nsteps): |
||
892 | val = rand.randint(1, 4) |
||
893 | # If mask is being used, checks if entry is in mask or not |
||
894 | if mask[int(x[i-1]), int(y[i-1])]: |
||
895 | if val == 1: |
||
896 | x[i] = x[i - 1] + step |
||
897 | y[i] = y[i - 1] |
||
898 | elif val == 2: |
||
899 | x[i] = x[i - 1] - step |
||
900 | y[i] = y[i - 1] |
||
901 | elif val == 3: |
||
902 | x[i] = x[i - 1] |
||
903 | y[i] = y[i - 1] + step |
||
904 | else: |
||
905 | x[i] = x[i - 1] |
||
906 | y[i] = y[i - 1] - step |
||
907 | # If it does cross into a False area, probability to be stuck |
||
908 | elif np.random.rand() > stuckprob: |
||
909 | x[i] = x[i - 1] |
||
910 | y[i] = y[i - 1] |
||
911 | |||
912 | while eumask[int(x[i]), int(y[i])] > 0: |
||
913 | vals = np.zeros(4) |
||
914 | vals[0] = eumask[int(x[i] + step), int(y[i])] |
||
915 | vals[1] = eumask[int(x[i] - step), int(y[i])] |
||
916 | vals[2] = eumask[int(x[i]), int(y[i] + step)] |
||
917 | vals[3] = eumask[int(x[i]), int(y[i] - step)] |
||
918 | vali = np.argmin(vals) |
||
919 | |||
920 | if vali == 0: |
||
921 | x[i] = x[i] + step |
||
922 | y[i] = y[i] |
||
923 | elif vali == 1: |
||
924 | x[i] = x[i] - step |
||
925 | y[i] = y[i] |
||
926 | elif vali == 2: |
||
927 | x[i] = x[i] |
||
928 | y[i] = y[i] + step |
||
929 | else: |
||
930 | x[i] = x[i] |
||
931 | y[i] = y[i] - step |
||
932 | # Otherwise, particle is stuck on "cell" |
||
933 | else: |
||
934 | x[i] = x[i - 1] |
||
935 | y[i] = y[i - 1] |
||
936 | |||
937 | return x, y |
||
938 | |||
939 | |||
940 | # def random_walk(nsteps=100, seed=1, start=(0, 0)): |
||
941 | # """Creates 2d random walk trajectory. |
||
942 | # |
||
943 | # Parameters |
||
944 | # ---------- |
||
945 | # nsteps : int |
||
946 | # Number of steps for trajectory to move. |
||
947 | # seed : int |
||
948 | # Seed for pseudo-random number generator for reproducability. |
||
949 | # start : tuple of int or float |
||
950 | # Starting xy coordinates at which the random walk begins. |
||
951 | # |
||
952 | # Returns |
||
953 | # ------- |
||
954 | # x : numpy.ndarray |
||
955 | # Array of x coordinates of random walk. |
||
956 | # y : numpy.ndarray |
||
957 | # Array of y coordinates of random walk. |
||
958 | # |
||
959 | # """ |
||
960 | # |
||
961 | # rand.seed(a=seed) |
||
962 | # |
||
963 | # x = np.zeros(nsteps) |
||
964 | # y = np.zeros(nsteps) |
||
965 | # x[0] = start[0] |
||
966 | # y[0] = start[1] |
||
967 | # |
||
968 | # for i in range(1, nsteps): |
||
969 | # val = rand.randint(1, 4) |
||
970 | # if val == 1: |
||
971 | # x[i] = x[i - 1] + 1 |
||
972 | # y[i] = y[i - 1] |
||
973 | # elif val == 2: |
||
974 | # x[i] = x[i - 1] - 1 |
||
975 | # y[i] = y[i - 1] |
||
976 | # elif val == 3: |
||
977 | # x[i] = x[i - 1] |
||
978 | # y[i] = y[i - 1] + 1 |
||
979 | # else: |
||
980 | # x[i] = x[i - 1] |
||
981 | # y[i] = y[i - 1] - 1 |
||
982 | # |
||
983 | # return x, y |
||
984 | |||
985 | |||
986 | View Code Duplication | def random_traj_dataset(nframes=100, nparts=30, seed=1, fsize=(0, 512), |
|
987 | ndist=(1, 2)): |
||
988 | """Creates a random population of random walks. |
||
989 | |||
990 | Parameters |
||
991 | ---------- |
||
992 | nframes : int |
||
993 | Number of frames for each random trajectory. |
||
994 | nparts : int |
||
995 | Number of particles in trajectory dataset. |
||
996 | seed : int |
||
997 | Seed for pseudo-random number generator for reproducability. |
||
998 | fsize : tuple of int or float |
||
999 | Scope of points over which particles may start at. |
||
1000 | ndist : tuple of int or float |
||
1001 | Parameters to generate normal distribution, mu and sigma. |
||
1002 | |||
1003 | Returns |
||
1004 | ------- |
||
1005 | dataf : pandas.core.frame.DataFrame |
||
1006 | Trajectory data containing a 'Frame', 'Track_ID', 'X', and |
||
1007 | 'Y' column. |
||
1008 | |||
1009 | """ |
||
1010 | |||
1011 | frames = [] |
||
1012 | trackid = [] |
||
1013 | x = [] |
||
1014 | y = [] |
||
1015 | start = [0, 0] |
||
1016 | pseed = seed |
||
1017 | |||
1018 | for i in range(nparts): |
||
1019 | rand.seed(a=i+pseed) |
||
1020 | start[0] = rand.randint(fsize[0], fsize[1]) |
||
1021 | rand.seed(a=i+3+pseed) |
||
1022 | start[1] = rand.randint(fsize[0], fsize[1]) |
||
1023 | rand.seed(a=i+5+pseed) |
||
1024 | weight = rand.normalvariate(mu=ndist[0], sigma=ndist[1]) |
||
1025 | |||
1026 | trackid = np.append(trackid, np.array([i]*nframes)) |
||
1027 | xi, yi = random_walk(nsteps=nframes, seed=i) |
||
1028 | x = np.append(x, weight*xi+start[0]) |
||
1029 | y = np.append(y, weight*yi+start[1]) |
||
1030 | frames = np.append(frames, np.linspace(0, nframes-1, nframes)) |
||
1031 | |||
1032 | datai = {'Frame': frames, |
||
1033 | 'Track_ID': trackid, |
||
1034 | 'X': x, |
||
1035 | 'Y': y, |
||
1036 | 'Quality': nframes*nparts*[10], |
||
1037 | 'SN_Ratio': nframes*nparts*[0.1], |
||
1038 | 'Mean_Intensity': nframes*nparts*[120]} |
||
1039 | dataf = pd.DataFrame(data=datai) |
||
1040 | |||
1041 | return dataf |
||
1042 | |||
1043 | |||
1044 | class Bunch: |
||
1045 | def __init__(self, **kwds): |
||
1046 | self.__dict__.update(kwds) |
||
1047 |