Total Complexity | 42 |
Total Lines | 877 |
Duplicated Lines | 94.53 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like diff_classifier.msd often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | """Functions to calculate mean squared displacements from trajectory data |
||
2 | |||
3 | This module includes functions to calculate mean squared displacements and |
||
4 | additional measures from input trajectory datasets as calculated by the |
||
5 | Trackmate ImageJ plugin. |
||
6 | |||
7 | """ |
||
8 | import warnings |
||
9 | import random as rand |
||
10 | |||
11 | import pandas as pd |
||
12 | import numpy as np |
||
13 | import numpy.ma as ma |
||
14 | import scipy.stats as stats |
||
15 | from scipy import interpolate |
||
16 | import matplotlib.pyplot as plt |
||
17 | from matplotlib.pyplot import cm |
||
18 | import diff_classifier.aws as aws |
||
19 | |||
20 | |||
21 | View Code Duplication | def nth_diff(dataframe, n=1, axis=0): |
|
|
|||
22 | """Calculates the nth difference between vector elements |
||
23 | |||
24 | Returns a new vector of size N - n containing the nth difference between |
||
25 | vector elements. |
||
26 | |||
27 | Parameters |
||
28 | ---------- |
||
29 | dataframe : pandas.core.series.Series of int or float |
||
30 | Input data on which differences are to be calculated. |
||
31 | n : int |
||
32 | Function calculated xpos(i) - xpos(i - n) for all values in pandas |
||
33 | series. |
||
34 | axis : {0, 1} |
||
35 | Axis along which differences are to be calculated. Default is 0. If 0, |
||
36 | input must be a pandas series. If 1, input must be a numpy array. |
||
37 | |||
38 | Returns |
||
39 | ------- |
||
40 | diff : pandas.core.series.Series of int or float |
||
41 | Pandas series of size N - n, where N is the original size of dataframe. |
||
42 | |||
43 | Examples |
||
44 | -------- |
||
45 | >>> df = np.ones((5, 10)) |
||
46 | >>> nth_diff(df) |
||
47 | array([[0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
48 | [0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
49 | [0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
50 | [0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
51 | [0., 0., 0., 0., 0., 0., 0., 0., 0.]]) |
||
52 | |||
53 | >>> df = np.ones((5, 10)) |
||
54 | >>> nth_diff (df) |
||
55 | array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
56 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
57 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], |
||
58 | [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) |
||
59 | |||
60 | """ |
||
61 | |||
62 | assert isinstance(n, int), "n must be an integer." |
||
63 | |||
64 | if dataframe.ndim == 1: |
||
65 | length = dataframe.shape[0] |
||
66 | if n <= length: |
||
67 | test1 = dataframe[:-n].reset_index(drop=True) |
||
68 | test2 = dataframe[n:].reset_index(drop=True) |
||
69 | diff = test2 - test1 |
||
70 | else: |
||
71 | diff = np.array([np.nan, np.nan]) |
||
72 | else: |
||
73 | length = dataframe.shape[0] |
||
74 | if n <= length: |
||
75 | if axis == 0: |
||
76 | test1 = dataframe[:-n, :] |
||
77 | test2 = dataframe[n:, :] |
||
78 | else: |
||
79 | test1 = dataframe[:, :-n] |
||
80 | test2 = dataframe[:, n:] |
||
81 | diff = test2 - test1 |
||
82 | else: |
||
83 | diff = np.array([np.nan, np.nan]) |
||
84 | |||
85 | return diff |
||
86 | |||
87 | |||
88 | View Code Duplication | def msd_calc(track, length=10): |
|
89 | """Calculates mean squared displacement of input track. |
||
90 | |||
91 | Returns numpy array containing MSD data calculated from an individual track. |
||
92 | |||
93 | Parameters |
||
94 | ---------- |
||
95 | track : pandas.core.frame.DataFrame |
||
96 | Contains, at a minimum a 'Frame', 'X', and 'Y' column |
||
97 | |||
98 | Returns |
||
99 | ------- |
||
100 | new_track : pandas.core.frame.DataFrame |
||
101 | Similar to input track. All missing frames of individual trajectories |
||
102 | are filled in with NaNs, and two new columns, MSDs and Gauss are added: |
||
103 | MSDs, calculated mean squared displacements using the formula |
||
104 | MSD = <(xpos-x0)**2> |
||
105 | Gauss, calculated Gaussianity |
||
106 | |||
107 | Examples |
||
108 | -------- |
||
109 | >>> data1 = {'Frame': [1, 2, 3, 4, 5], |
||
110 | ... 'X': [5, 6, 7, 8, 9], |
||
111 | ... 'Y': [6, 7, 8, 9, 10]} |
||
112 | >>> df = pd.DataFrame(data=data1) |
||
113 | >>> new_track = msd.msd_calc(df, 5) |
||
114 | |||
115 | >>> data1 = {'Frame': [1, 2, 3, 4, 5], |
||
116 | ... 'X': [5, 6, 7, 8, 9], |
||
117 | ... 'Y': [6, 7, 8, 9, 10]} |
||
118 | >>> df = pd.DataFrame(data=data1) |
||
119 | >>> new_track = msd.msd_calc(df) |
||
120 | |||
121 | """ |
||
122 | |||
123 | meansd = np.zeros(length) |
||
124 | gauss = np.zeros(length) |
||
125 | new_frame = np.linspace(1, length, length) |
||
126 | old_frame = track['Frame'] |
||
127 | oldxy = [track['X'], track['Y']] |
||
128 | fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False, |
||
129 | fill_value=np.nan), |
||
130 | interpolate.interp1d(old_frame, oldxy[1], bounds_error=False, |
||
131 | fill_value=np.nan)] |
||
132 | |||
133 | intxy = [ma.masked_equal(fxy[0](new_frame), np.nan), |
||
134 | ma.masked_equal(fxy[1](new_frame), np.nan)] |
||
135 | data1 = {'Frame': new_frame, |
||
136 | 'X': intxy[0], |
||
137 | 'Y': intxy[1] |
||
138 | } |
||
139 | new_track = pd.DataFrame(data=data1) |
||
140 | |||
141 | for frame in range(0, length-1): |
||
142 | xy = [np.square(nth_diff(new_track['X'], n=frame+1)), |
||
143 | np.square(nth_diff(new_track['Y'], n=frame+1))] |
||
144 | with warnings.catch_warnings(): |
||
145 | warnings.simplefilter("ignore", category=RuntimeWarning) |
||
146 | meansd[frame+1] = np.nanmean(xy[0] + xy[1]) |
||
147 | gauss[frame+1] = np.nanmean(xy[0]**2 + xy[1]**2 |
||
148 | )/(2*(meansd[frame+1]**2)) |
||
149 | |||
150 | new_track['MSDs'] = pd.Series(meansd, index=new_track.index) |
||
151 | new_track['Gauss'] = pd.Series(gauss, index=new_track.index) |
||
152 | |||
153 | return new_track |
||
154 | |||
155 | |||
156 | View Code Duplication | def all_msds(data): |
|
157 | """Calculates mean squared displacements of a trajectory dataset |
||
158 | |||
159 | Returns numpy array containing MSD data of all tracks in a trajectory |
||
160 | pandas dataframe. |
||
161 | |||
162 | Parameters |
||
163 | ---------- |
||
164 | data : pandas.core.frame.DataFrame |
||
165 | Contains, at a minimum a 'Frame', 'Track_ID', 'X', and |
||
166 | 'Y' column. Note: it is assumed that frames begins at 1, not 0 with this |
||
167 | function. Adjust before feeding into function. |
||
168 | |||
169 | Returns |
||
170 | ------- |
||
171 | new_data : pandas.core.frame.DataFrame |
||
172 | Similar to input data. All missing frames of individual trajectories |
||
173 | are filled in with NaNs, and two new columns, MSDs and Gauss are added: |
||
174 | MSDs, calculated mean squared displacements using the formula |
||
175 | MSD = <(xpos-x0)**2> |
||
176 | Gauss, calculated Gaussianity |
||
177 | |||
178 | Examples |
||
179 | -------- |
||
180 | >>> data1 = {'Frame': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5], |
||
181 | ... 'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], |
||
182 | ... 'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5], |
||
183 | ... 'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]} |
||
184 | >>> df = pd.DataFrame(data=data1) |
||
185 | >>> all_msds(df) |
||
186 | |||
187 | """ |
||
188 | |||
189 | trackids = data.Track_ID.unique() |
||
190 | partcount = trackids.shape[0] |
||
191 | length = int(max(data['Frame'])) |
||
192 | new = {} |
||
193 | new['length'] = partcount*length |
||
194 | new['frame'] = np.zeros(new['length']) |
||
195 | new['ID'] = np.zeros(new['length']) |
||
196 | new['xy'] = [np.zeros(new['length']), |
||
197 | np.zeros(new['length'])] |
||
198 | meansd = np.zeros(new['length']) |
||
199 | gauss = np.zeros(new['length']) |
||
200 | |||
201 | for particle in range(0, partcount): |
||
202 | single_track = data.loc[data['Track_ID'] == |
||
203 | trackids[particle] |
||
204 | ].sort_values(['Track_ID', 'Frame'], |
||
205 | ascending=[1, 1] |
||
206 | ).reset_index(drop=True) |
||
207 | if particle == 0: |
||
208 | index1 = 0 |
||
209 | index2 = length |
||
210 | else: |
||
211 | index1 = index2 |
||
212 | index2 = index2 + length |
||
213 | new['single_track'] = msd_calc(single_track, length=length) |
||
214 | new['frame'][index1:index2] = np.linspace(1, length, length) |
||
215 | new['ID'][index1:index2] = particle+1 |
||
216 | new['xy'][0][index1:index2] = new['single_track']['X'] |
||
217 | new['xy'][1][index1:index2] = new['single_track']['Y'] |
||
218 | meansd[index1:index2] = new['single_track']['MSDs'] |
||
219 | gauss[index1:index2] = new['single_track']['Gauss'] |
||
220 | |||
221 | data1 = {'Frame': new['frame'], |
||
222 | 'Track_ID': new['ID'], |
||
223 | 'X': new['xy'][0], |
||
224 | 'Y': new['xy'][1], |
||
225 | 'MSDs': meansd, |
||
226 | 'Gauss': gauss} |
||
227 | new_data = pd.DataFrame(data=data1) |
||
228 | |||
229 | return new_data |
||
230 | |||
231 | |||
232 | View Code Duplication | def make_xyarray(data, length=651): |
|
233 | """Rearranges xy position data into 2d arrays |
||
234 | |||
235 | Rearranges xy data from input pandas dataframe into 2D numpy array. |
||
236 | |||
237 | Parameters |
||
238 | ---------- |
||
239 | data : pd.core.frame.DataFrame |
||
240 | Contains, at a minimum a 'Frame', 'Track_ID', 'X', and |
||
241 | 'Y' column. |
||
242 | length : int |
||
243 | Desired length or number of frames to which to extend trajectories. |
||
244 | Any trajectories shorter than the input length will have the extra space |
||
245 | filled in with NaNs. |
||
246 | |||
247 | Returns |
||
248 | ------- |
||
249 | xyft : dict of np.ndarray |
||
250 | Dictionary containing xy position data, frame data, and trajectory ID |
||
251 | data. Contains the following keys: |
||
252 | farray, frames data (length x particles) |
||
253 | tarray, trajectory ID data (length x particles) |
||
254 | xarray, x position data (length x particles) |
||
255 | yarray, y position data (length x particles) |
||
256 | |||
257 | Examples |
||
258 | -------- |
||
259 | >>> data1 = {'Frame': [0, 1, 2, 3, 4, 2, 3, 4, 5, 6], |
||
260 | ... 'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], |
||
261 | ... 'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5], |
||
262 | ... 'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]} |
||
263 | >>> df = pd.DataFrame(data=data1) |
||
264 | >>> length = max(df['Frame']) + 1 |
||
265 | >>> xyft = msd.make_xyarray(df, length=length) |
||
266 | {'farray': array([[0., 0.], |
||
267 | [1., 1.], |
||
268 | [2., 2.], |
||
269 | [3., 3.], |
||
270 | [4., 4.], |
||
271 | [5., 5.], |
||
272 | [6., 6.]]), |
||
273 | 'tarray': array([[1., 2.], |
||
274 | [1., 2.], |
||
275 | [1., 2.], |
||
276 | [1., 2.], |
||
277 | [1., 2.], |
||
278 | [1., 2.], |
||
279 | [1., 2.]]), |
||
280 | 'xarray': array([[ 5., nan], |
||
281 | [ 6., nan], |
||
282 | [ 7., 1.], |
||
283 | [ 8., 2.], |
||
284 | [ 9., 3.], |
||
285 | [nan, 4.], |
||
286 | 'yarray': [nan, 5.]]), |
||
287 | array([[ 6., nan], |
||
288 | [ 7., nan], |
||
289 | [ 8., 2.], |
||
290 | [ 9., 3.], |
||
291 | [10., 4.], |
||
292 | [nan, 5.], |
||
293 | [nan, 6.]])} |
||
294 | |||
295 | """ |
||
296 | |||
297 | # Initial values |
||
298 | first_p = int(min(data['Track_ID'])) |
||
299 | particles = int(max(data['Track_ID'])) - first_p + 1 |
||
300 | xyft = {} |
||
301 | xyft['xarray'] = np.zeros((length, particles)) |
||
302 | xyft['yarray'] = np.zeros((length, particles)) |
||
303 | xyft['farray'] = np.zeros((length, particles)) |
||
304 | xyft['tarray'] = np.zeros((length, particles)) |
||
305 | xyft['qarray'] = np.zeros((length, particles)) |
||
306 | xyft['snarray'] = np.zeros((length, particles)) |
||
307 | xyft['iarray'] = np.zeros((length, particles)) |
||
308 | |||
309 | track = data[data['Track_ID'] == first_p |
||
310 | ].sort_values(['Track_ID', 'Frame'], |
||
311 | ascending=[1, 1]).reset_index(drop=True) |
||
312 | new_frame = np.linspace(0, length-1, length) |
||
313 | |||
314 | old_frame = track['Frame'].values.astype(float) |
||
315 | oldxy = [track['X'].values, |
||
316 | track['Y'].values, |
||
317 | track['Quality'].values, |
||
318 | track['SN_Ratio'].values, |
||
319 | track['Mean_Intensity'].values] |
||
320 | fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False, |
||
321 | fill_value=np.nan), |
||
322 | interpolate.interp1d(old_frame, oldxy[1], bounds_error=False, |
||
323 | fill_value=np.nan), |
||
324 | interpolate.interp1d(old_frame, oldxy[2], bounds_error=False, |
||
325 | fill_value=np.nan), |
||
326 | interpolate.interp1d(old_frame, oldxy[3], bounds_error=False, |
||
327 | fill_value=np.nan), |
||
328 | interpolate.interp1d(old_frame, oldxy[4], bounds_error=False, |
||
329 | fill_value=np.nan)] |
||
330 | |||
331 | intxy = [fxy[0](new_frame), fxy[1](new_frame), fxy[2](new_frame), |
||
332 | fxy[3](new_frame), fxy[4](new_frame)] |
||
333 | |||
334 | # Fill in entire array |
||
335 | xyft['xarray'][:, 0] = intxy[0] |
||
336 | xyft['yarray'][:, 0] = intxy[1] |
||
337 | xyft['farray'][:, 0] = new_frame |
||
338 | xyft['tarray'][:, 0] = first_p |
||
339 | xyft['qarray'][:, 0] = intxy[2] |
||
340 | xyft['snarray'][:, 0] = intxy[3] |
||
341 | xyft['iarray'][:, 0] = intxy[4] |
||
342 | |||
343 | for part in range(first_p+1, first_p+particles): |
||
344 | track = data[data['Track_ID'] == part |
||
345 | ].sort_values(['Track_ID', 'Frame'], |
||
346 | ascending=[1, 1]).reset_index(drop=True) |
||
347 | |||
348 | old_frame = track['Frame'] |
||
349 | oldxy = [track['X'].values, |
||
350 | track['Y'].values, |
||
351 | track['Quality'].values, |
||
352 | track['SN_Ratio'].values, |
||
353 | track['Mean_Intensity'].values] |
||
354 | |||
355 | fxy = [interpolate.interp1d(old_frame, oldxy[0], bounds_error=False, |
||
356 | fill_value=np.nan), |
||
357 | interpolate.interp1d(old_frame, oldxy[1], bounds_error=False, |
||
358 | fill_value=np.nan), |
||
359 | interpolate.interp1d(old_frame, oldxy[2], bounds_error=False, |
||
360 | fill_value=np.nan), |
||
361 | interpolate.interp1d(old_frame, oldxy[3], bounds_error=False, |
||
362 | fill_value=np.nan), |
||
363 | interpolate.interp1d(old_frame, oldxy[4], bounds_error=False, |
||
364 | fill_value=np.nan)] |
||
365 | |||
366 | intxy = [fxy[0](new_frame), fxy[1](new_frame), fxy[2](new_frame), |
||
367 | fxy[3](new_frame), fxy[4](new_frame)] |
||
368 | |||
369 | xyft['xarray'][:, part-first_p] = intxy[0] |
||
370 | xyft['yarray'][:, part-first_p] = intxy[1] |
||
371 | xyft['farray'][:, part-first_p] = new_frame |
||
372 | xyft['tarray'][:, part-first_p] = part |
||
373 | xyft['qarray'][:, part-first_p] = intxy[2] |
||
374 | xyft['snarray'][:, part-first_p] = intxy[3] |
||
375 | xyft['iarray'][:, part-first_p] = intxy[4] |
||
376 | |||
377 | return xyft |
||
378 | |||
379 | |||
380 | View Code Duplication | def all_msds2(data, frames=651): |
|
381 | """Calculates mean squared displacements of input trajectory dataset |
||
382 | |||
383 | Returns numpy array containing MSD data of all tracks in a trajectory pandas |
||
384 | dataframe. |
||
385 | |||
386 | Parameters |
||
387 | ---------- |
||
388 | data : pandas.core.frame.DataFrame |
||
389 | Contains, at a minimum a 'Frame', 'Track_ID', 'X', and |
||
390 | 'Y' column. Note: it is assumed that frames begins at 0. |
||
391 | |||
392 | Returns |
||
393 | ------- |
||
394 | new_data : pandas.core.frame.DataFrame |
||
395 | Similar to input data. All missing frames of individual trajectories |
||
396 | are filled in with NaNs, and two new columns, MSDs and Gauss are added: |
||
397 | MSDs, calculated mean squared displacements using the formula |
||
398 | MSD = <(xpos-x0)**2> |
||
399 | Gauss, calculated Gaussianity |
||
400 | |||
401 | Examples |
||
402 | -------- |
||
403 | >>> data1 = {'Frame': [0, 1, 2, 3, 4, 0, 1, 2, 3, 4], |
||
404 | ... 'Track_ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], |
||
405 | ... 'X': [5, 6, 7, 8, 9, 1, 2, 3, 4, 5], |
||
406 | ... 'Y': [6, 7, 8, 9, 10, 2, 3, 4, 5, 6]} |
||
407 | >>> df = pd.DataFrame(data=data1) |
||
408 | >>> cols = ['Frame', 'Track_ID', 'X', 'Y', 'MSDs', 'Gauss'] |
||
409 | >>> om flength = max(df['Frame']) + 1 |
||
410 | >>> msd.all_msds2(df, frames=length)[cols] |
||
411 | |||
412 | """ |
||
413 | if data.shape[0] > 2: |
||
414 | try: |
||
415 | xyft = make_xyarray(data, length=frames) |
||
416 | length = xyft['xarray'].shape[0] |
||
417 | particles = xyft['xarray'].shape[1] |
||
418 | |||
419 | meansd = np.zeros((length, particles)) |
||
420 | gauss = np.zeros((length, particles)) |
||
421 | |||
422 | for frame in range(0, length-1): |
||
423 | xpos = np.square(nth_diff(xyft['xarray'], n=frame+1)) |
||
424 | ypos = np.square(nth_diff(xyft['yarray'], n=frame+1)) |
||
425 | |||
426 | with warnings.catch_warnings(): |
||
427 | warnings.simplefilter("ignore", category=RuntimeWarning) |
||
428 | meansd[frame+1, :] = np.nanmean(xpos + ypos, axis=0) |
||
429 | gauss[frame+1, :] = np.nanmean(xpos**2 + ypos**2, axis=0 |
||
430 | )/(2*(meansd[frame+1]**2)) |
||
431 | |||
432 | data1 = {'Frame': xyft['farray'].flatten('F'), |
||
433 | 'Track_ID': xyft['tarray'].flatten('F'), |
||
434 | 'X': xyft['xarray'].flatten('F'), |
||
435 | 'Y': xyft['yarray'].flatten('F'), |
||
436 | 'MSDs': meansd.flatten('F'), |
||
437 | 'Gauss': gauss.flatten('F'), |
||
438 | 'Quality': xyft['qarray'].flatten('F'), |
||
439 | 'SN_Ratio': xyft['snarray'].flatten('F'), |
||
440 | 'Mean_Intensity': xyft['iarray'].flatten('F')} |
||
441 | |||
442 | new_data = pd.DataFrame(data=data1) |
||
443 | except ValueError: |
||
444 | data1 = {'Frame': [], |
||
445 | 'Track_ID': [], |
||
446 | 'X': [], |
||
447 | 'Y': [], |
||
448 | 'MSDs': [], |
||
449 | 'Gauss': [], |
||
450 | 'Quality': [], |
||
451 | 'SN_Ratio': [], |
||
452 | 'Mean_Intensity': []} |
||
453 | new_data = pd.DataFrame(data=data1) |
||
454 | except IndexError: |
||
455 | data1 = {'Frame': [], |
||
456 | 'Track_ID': [], |
||
457 | 'X': [], |
||
458 | 'Y': [], |
||
459 | 'MSDs': [], |
||
460 | 'Gauss': [], |
||
461 | 'Quality': [], |
||
462 | 'SN_Ratio': [], |
||
463 | 'Mean_Intensity': []} |
||
464 | new_data = pd.DataFrame(data=data1) |
||
465 | else: |
||
466 | data1 = {'Frame': [], |
||
467 | 'Track_ID': [], |
||
468 | 'X': [], |
||
469 | 'Y': [], |
||
470 | 'MSDs': [], |
||
471 | 'Gauss': [], |
||
472 | 'Quality': [], |
||
473 | 'SN_Ratio': [], |
||
474 | 'Mean_Intensity': []} |
||
475 | new_data = pd.DataFrame(data=data1) |
||
476 | |||
477 | return new_data |
||
478 | |||
479 | |||
480 | View Code Duplication | def geomean_msdisp(prefix, umppx=0.16, fps=100.02, upload=True, |
|
481 | remote_folder="01_18_Experiment", bucket='ccurtis.data', |
||
482 | backup_frames=651): |
||
483 | """Comptes geometric averages of mean squared displacement datasets |
||
484 | |||
485 | Calculates geometric averages and stadard errors for MSD datasets. Might |
||
486 | error out if not formatted as output from all_msds2. |
||
487 | |||
488 | Parameters |
||
489 | ---------- |
||
490 | prefix : string |
||
491 | Prefix of file name to be plotted e.g. features_P1.csv prefix is P1. |
||
492 | umppx : float |
||
493 | Microns per pixel of original images. |
||
494 | fps : float |
||
495 | Frames per second of video. |
||
496 | upload : bool |
||
497 | True if you want to upload to s3. |
||
498 | remote_folder : string |
||
499 | Folder in S3 bucket to upload to. |
||
500 | bucket : string |
||
501 | Name of S3 bucket to upload to. |
||
502 | |||
503 | Returns |
||
504 | ------- |
||
505 | geo_mean : numpy.ndarray |
||
506 | Geometric mean of trajectory MSDs at all time points. |
||
507 | geo_stder : numpy.ndarray |
||
508 | Geometric standard errot of trajectory MSDs at all time points. |
||
509 | |||
510 | """ |
||
511 | |||
512 | merged = pd.read_csv('msd_{}.csv'.format(prefix)) |
||
513 | try: |
||
514 | particles = int(max(merged['Track_ID'])) |
||
515 | frames = int(max(merged['Frame'])) |
||
516 | ypos = np.zeros((particles+1, frames+1)) |
||
517 | |||
518 | for i in range(0, particles+1): |
||
519 | ypos[i, :] = merged.loc[merged.Track_ID == i, 'MSDs']*umppx*umppx |
||
520 | xpos = merged.loc[merged.Track_ID == i, 'Frame']/fps |
||
521 | |||
522 | geo_mean = np.nanmean(ma.log(ypos), axis=0) |
||
523 | geo_stder = ma.masked_equal(stats.sem(ma.log(ypos), axis=0, |
||
524 | nan_policy='omit'), 0.0) |
||
525 | |||
526 | except ValueError: |
||
527 | geo_mean = np.nan*np.ones(backup_frames) |
||
528 | geo_stder = np.nan*np.ones(backup_frames) |
||
529 | |||
530 | np.savetxt('geomean_{}.csv'.format(prefix), geo_mean, delimiter=",") |
||
531 | np.savetxt('geoSEM_{}.csv'.format(prefix), geo_stder, delimiter=",") |
||
532 | |||
533 | if upload: |
||
534 | aws.upload_s3('geomean_{}.csv'.format(prefix), |
||
535 | remote_folder+'/'+'geomean_{}.csv'.format(prefix), |
||
536 | bucket_name=bucket) |
||
537 | aws.upload_s3('geoSEM_{}.csv'.format(prefix), |
||
538 | remote_folder+'/'+'geoSEM_{}.csv'.format(prefix), |
||
539 | bucket_name=bucket) |
||
540 | |||
541 | return geo_mean, geo_stder |
||
542 | |||
543 | |||
544 | View Code Duplication | def binning(experiments, wells=4, prefix='test'): |
|
545 | """Split set of input experiments into groups. |
||
546 | |||
547 | Parameters |
||
548 | ---------- |
||
549 | experiments : list of str |
||
550 | List of experiment names. |
||
551 | wells : int |
||
552 | Number of groups to divide experiments into. |
||
553 | |||
554 | Returns |
||
555 | ------- |
||
556 | slices : int |
||
557 | Number of experiments per group. |
||
558 | bins : dict of list of str |
||
559 | Dictionary, keys corresponding to group names, and elements containing |
||
560 | lists of experiments in each group. |
||
561 | bin_names : list of str |
||
562 | List of group names |
||
563 | |||
564 | """ |
||
565 | |||
566 | total_videos = len(experiments) |
||
567 | bins = {} |
||
568 | slices = int(total_videos/wells) |
||
569 | bin_names = [] |
||
570 | |||
571 | for num in range(0, wells): |
||
572 | slice1 = num*slices |
||
573 | slice2 = (num+1)*(slices) |
||
574 | pref = '{}_W{}'.format(prefix, num) |
||
575 | bins[pref] = experiments[slice1:slice2] |
||
576 | bin_names.append(pref) |
||
577 | return slices, bins, bin_names |
||
578 | |||
579 | |||
580 | View Code Duplication | def precision_weight(group, geo_stder): |
|
581 | """Calculates precision-based weights from input standard error data |
||
582 | |||
583 | Calculates precision weights to be used in precision-averaged MSD |
||
584 | calculations. |
||
585 | |||
586 | Parameters |
||
587 | ---------- |
||
588 | group : list of str |
||
589 | List of experiment names to average. Each element corresponds to a key |
||
590 | in geo_stder and geomean. |
||
591 | geo_stder : dict of numpy.ndarray |
||
592 | Each entry in dictionary corresponds to the standard errors of an MSD |
||
593 | profile, the key corresponding to an experiment name. |
||
594 | |||
595 | Returns |
||
596 | ------- |
||
597 | weights: numpy.ndarray |
||
598 | Precision weights to be used in precision averaging. |
||
599 | w_holder : numpy.ndarray |
||
600 | Precision values of each video at each time point. |
||
601 | |||
602 | """ |
||
603 | |||
604 | frames = np.shape(geo_stder[group[0]])[0] |
||
605 | slices = len(group) |
||
606 | video_counter = 0 |
||
607 | w_holder = np.zeros((slices, frames)) |
||
608 | for sample in group: |
||
609 | w_holder[video_counter, :] = 1/(geo_stder[sample]*geo_stder[sample]) |
||
610 | video_counter = video_counter + 1 |
||
611 | |||
612 | w_holder = ma.masked_equal(w_holder, 0.0) |
||
613 | w_holder = ma.masked_equal(w_holder, 1.0) |
||
614 | |||
615 | weights = ma.sum(w_holder, axis=0) |
||
616 | |||
617 | return weights, w_holder |
||
618 | |||
619 | |||
620 | View Code Duplication | def precision_averaging(group, geomean, geo_stder, weights, save=True, |
|
621 | bucket='ccurtis.data', folder='test', |
||
622 | experiment='test'): |
||
623 | """Calculates precision-weighted averages of MSD datasets. |
||
624 | |||
625 | Parameters |
||
626 | ---------- |
||
627 | group : list of str |
||
628 | List of experiment names to average. Each element corresponds to a key |
||
629 | in geo_stder and geomean. |
||
630 | geomean : dict of numpy.ndarray |
||
631 | Each entry in dictionary corresponds to an MSD profiles, they key |
||
632 | corresponding to an experiment name. |
||
633 | geo_stder : dict of numpy.ndarray |
||
634 | Each entry in dictionary corresponds to the standard errors of an MSD |
||
635 | profile, the key corresponding to an experiment name. |
||
636 | weights : numpy.ndarray |
||
637 | Precision weights to be used in precision averaging. |
||
638 | |||
639 | Returns |
||
640 | ------- |
||
641 | geo : numpy.ndarray |
||
642 | Precision-weighted averaged MSDs from experiments specified in group |
||
643 | geo_stder : numpy.ndarray |
||
644 | Precision-weighted averaged SEMs from experiments specified in group |
||
645 | |||
646 | """ |
||
647 | |||
648 | frames = np.shape(geo_stder[group[0]])[0] |
||
649 | slices = len(group) |
||
650 | |||
651 | video_counter = 0 |
||
652 | geo_holder = np.zeros((slices, frames)) |
||
653 | gstder_holder = np.zeros((slices, frames)) |
||
654 | w_holder = np.zeros((slices, frames)) |
||
655 | for sample in group: |
||
656 | w_holder[video_counter, :] = (1/(geo_stder[sample]*geo_stder[sample]) |
||
657 | )/weights |
||
658 | geo_holder[video_counter, :] = w_holder[video_counter, : |
||
659 | ] * geomean[sample] |
||
660 | gstder_holder[video_counter, :] = 1/(geo_stder[sample]*geo_stder[sample] |
||
661 | ) |
||
662 | video_counter = video_counter + 1 |
||
663 | |||
664 | w_holder = ma.masked_equal(w_holder, 0.0) |
||
665 | w_holder = ma.masked_equal(w_holder, 1.0) |
||
666 | geo_holder = ma.masked_equal(geo_holder, 0.0) |
||
667 | geo_holder = ma.masked_equal(geo_holder, 1.0) |
||
668 | gstder_holder = ma.masked_equal(gstder_holder, 0.0) |
||
669 | gstder_holder = ma.masked_equal(gstder_holder, 1.0) |
||
670 | |||
671 | geo = ma.sum(geo_holder, axis=0) |
||
672 | geo_stder = ma.sqrt((1/ma.sum(gstder_holder, axis=0))) |
||
673 | |||
674 | if save: |
||
675 | geo_f = 'geomean_{}.csv'.format(experiment) |
||
676 | gstder_f = 'geoSEM_{}.csv'.format(experiment) |
||
677 | np.savetxt(geo_f, geo, delimiter=',') |
||
678 | np.savetxt(gstder_f, geo_stder, delimiter=',') |
||
679 | aws.upload_s3(geo_f, '{}/{}'.format(folder, geo_f), bucket_name=bucket) |
||
680 | aws.upload_s3(gstder_f, '{}/{}'.format(folder, gstder_f), |
||
681 | bucket_name=bucket) |
||
682 | |||
683 | geodata = Bunch(geomean=geo, geostd=geo_stder, weighthold=w_holder, |
||
684 | geostdhold=gstder_holder) |
||
685 | |||
686 | return geodata |
||
687 | |||
688 | |||
689 | View Code Duplication | def plot_all_experiments(experiments, bucket='ccurtis.data', folder='test', |
|
690 | yrange=(10**-1, 10**1), fps=100.02, |
||
691 | xrange=(10**-2, 10**0), upload=True, |
||
692 | outfile='test.png', exponential=True): |
||
693 | """Plots precision-weighted averages of MSD datasets. |
||
694 | |||
695 | Plots pre-calculated precision-weighted averages of MSD datasets calculated |
||
696 | from precision_averaging and stored in an AWS S3 bucket. |
||
697 | |||
698 | Parameters |
||
699 | ---------- |
||
700 | group : list of str |
||
701 | List of experiment names to plot. Each experiment must have an MSD and |
||
702 | SEM file associated with it in s3. |
||
703 | bucket : str |
||
704 | S3 bucket from which to download data. |
||
705 | folder : str |
||
706 | Folder in s3 bucket from which to download data. |
||
707 | yrange : list of float |
||
708 | Y range of plot |
||
709 | xrange: list of float |
||
710 | X range of plot |
||
711 | upload : bool |
||
712 | True to upload to S3 |
||
713 | outfile : str |
||
714 | Filename of output image |
||
715 | |||
716 | """ |
||
717 | |||
718 | n = len(experiments) |
||
719 | |||
720 | color = iter(cm.viridis(np.linspace(0, 0.9, n))) |
||
721 | |||
722 | fig = plt.figure(figsize=(8.5, 8.5)) |
||
723 | plt.xlim(xrange[0], xrange[1]) |
||
724 | plt.ylim(yrange[0], yrange[1]) |
||
725 | plt.xlabel('Tau (s)', fontsize=25) |
||
726 | plt.ylabel(r'Mean Squared Displacement ($\mu$m$^2$)', fontsize=25) |
||
727 | |||
728 | geo = {} |
||
729 | gstder = {} |
||
730 | counter = 0 |
||
731 | for experiment in experiments: |
||
732 | aws.download_s3('{}/geomean_{}.csv'.format(folder, experiment), |
||
733 | 'geomean_{}.csv'.format(experiment), bucket_name=bucket) |
||
734 | aws.download_s3('{}/geoSEM_{}.csv'.format(folder, experiment), |
||
735 | 'geoSEM_{}.csv'.format(experiment), bucket_name=bucket) |
||
736 | |||
737 | geo[counter] = np.genfromtxt('geomean_{}.csv'.format(experiment)) |
||
738 | gstder[counter] = np.genfromtxt('geoSEM_{}.csv'.format(experiment)) |
||
739 | geo[counter] = ma.masked_equal(geo[counter], 0.0) |
||
740 | gstder[counter] = ma.masked_equal(gstder[counter], 0.0) |
||
741 | |||
742 | frames = np.shape(gstder[counter])[0] |
||
743 | xpos = np.linspace(0, frames-1, frames)/fps |
||
744 | c = next(color) |
||
745 | |||
746 | if exponential: |
||
747 | plt.loglog(xpos, np.exp(geo[counter]), c=c, linewidth=6, |
||
748 | label=experiment) |
||
749 | plt.loglog(xpos, np.exp(geo[counter] - 1.96*gstder[counter]), |
||
750 | c=c, dashes=[6, 2], linewidth=4) |
||
751 | plt.loglog(xpos, np.exp(geo[counter] + 1.96*gstder[counter]), |
||
752 | c=c, dashes=[6, 2], linewidth=4) |
||
753 | else: |
||
754 | plt.loglog(xpos, geo[counter], c=c, linewidth=6, |
||
755 | label=experiment) |
||
756 | plt.loglog(xpos, geo[counter] - 1.96*gstder[counter], c=c, |
||
757 | dashes=[6, 2], linewidth=4) |
||
758 | plt.loglog(xpos, geo[counter] + 1.96*gstder[counter], c=c, |
||
759 | dashes=[6, 2], linewidth=4) |
||
760 | |||
761 | counter = counter + 1 |
||
762 | |||
763 | plt.legend(frameon=False, prop={'size': 16}) |
||
764 | |||
765 | if upload: |
||
766 | fig.savefig(outfile, bbox_inches='tight') |
||
767 | aws.upload_s3(outfile, folder+'/'+outfile, bucket_name=bucket) |
||
768 | |||
769 | |||
770 | View Code Duplication | def random_walk(nsteps=100, seed=1, start=(0, 0)): |
|
771 | """Creates 2d random walk trajectory. |
||
772 | |||
773 | Parameters |
||
774 | ---------- |
||
775 | nsteps : int |
||
776 | Number of steps for trajectory to move. |
||
777 | seed : int |
||
778 | Seed for pseudo-random number generator for reproducability. |
||
779 | start : tuple of int or float |
||
780 | Starting xy coordinates at which the random walk begins. |
||
781 | |||
782 | Returns |
||
783 | ------- |
||
784 | x : numpy.ndarray |
||
785 | Array of x coordinates of random walk. |
||
786 | y : numpy.ndarray |
||
787 | Array of y coordinates of random walk. |
||
788 | |||
789 | """ |
||
790 | |||
791 | rand.seed(a=seed) |
||
792 | |||
793 | x = np.zeros(nsteps) |
||
794 | y = np.zeros(nsteps) |
||
795 | x[0] = start[0] |
||
796 | y[0] = start[1] |
||
797 | |||
798 | for i in range(1, nsteps): |
||
799 | val = rand.randint(1, 4) |
||
800 | if val == 1: |
||
801 | x[i] = x[i - 1] + 1 |
||
802 | y[i] = y[i - 1] |
||
803 | elif val == 2: |
||
804 | x[i] = x[i - 1] - 1 |
||
805 | y[i] = y[i - 1] |
||
806 | elif val == 3: |
||
807 | x[i] = x[i - 1] |
||
808 | y[i] = y[i - 1] + 1 |
||
809 | else: |
||
810 | x[i] = x[i - 1] |
||
811 | y[i] = y[i - 1] - 1 |
||
812 | |||
813 | return x, y |
||
814 | |||
815 | |||
816 | View Code Duplication | def random_traj_dataset(nframes=100, nparts=30, seed=1, fsize=(0, 512), |
|
817 | ndist=(1, 2)): |
||
818 | """Creates a random population of random walks. |
||
819 | |||
820 | Parameters |
||
821 | ---------- |
||
822 | nframes : int |
||
823 | Number of frames for each random trajectory. |
||
824 | nparts : int |
||
825 | Number of particles in trajectory dataset. |
||
826 | seed : int |
||
827 | Seed for pseudo-random number generator for reproducability. |
||
828 | fsize : tuple of int or float |
||
829 | Scope of points over which particles may start at. |
||
830 | ndist : tuple of int or float |
||
831 | Parameters to generate normal distribution, mu and sigma. |
||
832 | |||
833 | Returns |
||
834 | ------- |
||
835 | dataf : pandas.core.frame.DataFrame |
||
836 | Trajectory data containing a 'Frame', 'Track_ID', 'X', and |
||
837 | 'Y' column. |
||
838 | |||
839 | """ |
||
840 | |||
841 | frames = [] |
||
842 | trackid = [] |
||
843 | x = [] |
||
844 | y = [] |
||
845 | start = [0, 0] |
||
846 | pseed = seed |
||
847 | |||
848 | for i in range(nparts): |
||
849 | rand.seed(a=i+pseed) |
||
850 | start[0] = rand.randint(fsize[0], fsize[1]) |
||
851 | rand.seed(a=i+3+pseed) |
||
852 | start[1] = rand.randint(fsize[0], fsize[1]) |
||
853 | rand.seed(a=i+5+pseed) |
||
854 | weight = rand.normalvariate(mu=ndist[0], sigma=ndist[1]) |
||
855 | |||
856 | trackid = np.append(trackid, np.array([i]*nframes)) |
||
857 | xi, yi = random_walk(nsteps=nframes, seed=i) |
||
858 | x = np.append(x, weight*xi+start[0]) |
||
859 | y = np.append(y, weight*yi+start[1]) |
||
860 | frames = np.append(frames, np.linspace(0, nframes-1, nframes)) |
||
861 | |||
862 | datai = {'Frame': frames, |
||
863 | 'Track_ID': trackid, |
||
864 | 'X': x, |
||
865 | 'Y': y, |
||
866 | 'Quality': nframes*nparts*[10], |
||
867 | 'SN_Ratio': nframes*nparts*[0.1], |
||
868 | 'Mean_Intensity': nframes*nparts*[120]} |
||
869 | dataf = pd.DataFrame(data=datai) |
||
870 | |||
871 | return dataf |
||
872 | |||
873 | |||
874 | class Bunch: |
||
875 | def __init__(self, **kwds): |
||
876 | self.__dict__.update(kwds) |
||
877 |