Total Complexity | 46 |
Total Lines | 745 |
Duplicated Lines | 95.44 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like diff_classifier.features often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import numpy as np |
||
|
|||
2 | import pandas as pd |
||
3 | import numpy.ma as ma |
||
4 | from scipy.optimize import curve_fit |
||
5 | import numpy.linalg as LA |
||
6 | import math |
||
7 | import struct |
||
8 | import sys |
||
9 | |||
10 | import diff_classifier.utils as ut |
||
11 | import diff_classifier.msd as msd |
||
12 | |||
13 | |||
14 | View Code Duplication | def unmask_track(track): |
|
15 | """ |
||
16 | Removes empty frames from a track in an MSD pandas dataframe. |
||
17 | |||
18 | Parameters |
||
19 | ---------- |
||
20 | track : pandas Dataframe |
||
21 | At a minimum, must contain a Frame, Track_ID, X, Y, MSDs, and |
||
22 | Gauss column. |
||
23 | |||
24 | Returns |
||
25 | ------- |
||
26 | comp_track : pandas Dataframe |
||
27 | Similar to track, but has all masked components removed. |
||
28 | |||
29 | Examples |
||
30 | -------- |
||
31 | |||
32 | """ |
||
33 | x = ma.masked_invalid(track['X']) |
||
34 | msd = ma.masked_invalid(track['MSDs']) |
||
35 | x_mask = ma.getmask(x) |
||
36 | msd_mask = ma.getmask(msd) |
||
37 | comp_frame = ma.compressed(ma.masked_where(msd_mask, track['Frame'])) |
||
38 | comp_ID = ma.compressed(ma.masked_where(msd_mask, track['Track_ID'])) |
||
39 | comp_x = ma.compressed(ma.masked_where(x_mask, track['X'])) |
||
40 | comp_y = ma.compressed(ma.masked_where(x_mask, track['Y'])) |
||
41 | comp_msd = ma.compressed(ma.masked_where(msd_mask, track['MSDs'])) |
||
42 | comp_gauss = ma.compressed(ma.masked_where(msd_mask, track['Gauss'])) |
||
43 | |||
44 | d = {'Frame': comp_frame, |
||
45 | 'Track_ID': comp_ID, |
||
46 | 'X': comp_x, |
||
47 | 'Y': comp_y, |
||
48 | 'MSDs': comp_msd, |
||
49 | 'Gauss': comp_gauss |
||
50 | } |
||
51 | comp_track = pd.DataFrame(data=d) |
||
52 | return comp_track |
||
53 | |||
54 | |||
55 | View Code Duplication | def alpha_calc(track): |
|
56 | """ |
||
57 | Calculates the parameter alpha by fitting track MSD data to a function. |
||
58 | |||
59 | Parameters |
||
60 | ---------- |
||
61 | track : pandas DataFrame |
||
62 | At a minimum, must contain a Frames and a MSDs column. The function |
||
63 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
64 | |||
65 | Returns |
||
66 | ------- |
||
67 | a : numpy.float64 |
||
68 | The anomalous exponent derived by fitting MSD values to the function, |
||
69 | <r**2(n)> = 4*D*(n*delt)**a |
||
70 | D : numpy.float64 |
||
71 | The fitted diffusion coefficient derived by fitting MSD values to the |
||
72 | function above. |
||
73 | |||
74 | Examples |
||
75 | -------- |
||
76 | >>> frames = 5 |
||
77 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
78 | 'X': np.linspace(1, frames, frames)+5, |
||
79 | 'Y': np.linspace(1, frames, frames)+3} |
||
80 | >>> df = pd.DataFrame(data=d) |
||
81 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
82 | >>> alpha_calc(df) |
||
83 | (2.0000000000000004, 0.4999999999999999) |
||
84 | |||
85 | >>> frames = 10 |
||
86 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
87 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
88 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
89 | >>> df = pd.DataFrame(data=d) |
||
90 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
91 | >>> alpha_calc(df) |
||
92 | (0.023690002018364065, 0.5144436515510022) |
||
93 | """ |
||
94 | |||
95 | # assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
96 | # assert type(track['MSDs']) == pd.core.series.Series, "track must contain MSDs column." |
||
97 | # assert type(track['Frame']) == pd.core.series.Series, "track must contain Frame column." |
||
98 | # assert track.shape[0] > 0, "track must not be empty." |
||
99 | |||
100 | y = track['MSDs'] |
||
101 | x = track['Frame'] |
||
102 | |||
103 | def msd_alpha(x, a, D): |
||
104 | return 4*D*(x**a) |
||
105 | |||
106 | try: |
||
107 | popt, pcov = curve_fit(msd_alpha, x, y) |
||
108 | a = popt[0] |
||
109 | D = popt[1] |
||
110 | except RuntimeError: |
||
111 | print('Optimal parameters not found. Print NaN instead.') |
||
112 | a = np.nan |
||
113 | D = np.nan |
||
114 | return a, D |
||
115 | |||
116 | |||
117 | View Code Duplication | def gyration_tensor(track): |
|
118 | """ |
||
119 | Calculates the eigenvalues and eigenvectors of the gyration tensor of the |
||
120 | input trajectory. |
||
121 | |||
122 | Parameters |
||
123 | ---------- |
||
124 | track : pandas DataFrame |
||
125 | At a minimum, must contain an X and Y column. The function |
||
126 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
127 | |||
128 | Returns |
||
129 | ------- |
||
130 | l1 : numpy.float64 |
||
131 | Dominant eigenvalue of the gyration tensor. |
||
132 | l2 : numpy.float64 |
||
133 | Secondary eigenvalue of the gyration tensor. |
||
134 | v1 : 2 x 1 numpy.ndarray of numpy.float64 objects |
||
135 | Dominant eigenvector of the gyration tensor. |
||
136 | v2 : 2 x 1 numpy.ndarray of numpy.float64 objects |
||
137 | Secondary eigenvector of the gyration tensor. |
||
138 | |||
139 | Examples |
||
140 | -------- |
||
141 | >>> frames = 5 |
||
142 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
143 | 'X': np.linspace(1, frames, frames)+5, |
||
144 | 'Y': np.linspace(1, frames, frames)+3} |
||
145 | >>> df = pd.DataFrame(data=d) |
||
146 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
147 | >>> gyration_tensor(df) |
||
148 | (4.0, |
||
149 | 4.4408920985006262e-16, |
||
150 | array([ 0.70710678, -0.70710678]), |
||
151 | array([ 0.70710678, 0.70710678])) |
||
152 | |||
153 | >>> frames = 10 |
||
154 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
155 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
156 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
157 | >>> df = pd.DataFrame(data=d) |
||
158 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
159 | >>> gyration_tensor(df) |
||
160 | (0.53232560128104522, |
||
161 | 0.42766829138901619, |
||
162 | array([ 0.6020119 , -0.79848711]), |
||
163 | array([-0.79848711, -0.6020119 ])) |
||
164 | """ |
||
165 | |||
166 | df = track |
||
167 | assert type(df) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
168 | assert type(df['X']) == pd.core.series.Series, "track must contain X column." |
||
169 | assert type(df['Y']) == pd.core.series.Series, "track must contain Y column." |
||
170 | assert df.shape[0] > 0, "track must not be empty." |
||
171 | |||
172 | Ta = np.sum((df['X'] - np.mean(df['X']))**2)/df['X'].shape[0] |
||
173 | Tb = np.sum((df['Y'] - np.mean(df['Y']))**2)/df['Y'].shape[0] |
||
174 | Tab = np.sum((df['X'] - np.mean(df['X']))*(df['Y'] - np.mean(df['Y'])))/df['X'].shape[0] |
||
175 | |||
176 | w, v = LA.eig(np.array([[Ta, Tab], [Tab, Tb]])) |
||
177 | dom = np.argmax(np.abs(w)) |
||
178 | rec = np.argmin(np.abs(w)) |
||
179 | l1 = w[dom] |
||
180 | l2 = w[rec] |
||
181 | v1 = v[dom] |
||
182 | v2 = v[rec] |
||
183 | return l1, l2, v1, v2 |
||
184 | |||
185 | |||
186 | View Code Duplication | def kurtosis(track): |
|
187 | """ |
||
188 | Calculates the kurtosis of input track. |
||
189 | |||
190 | Parameters |
||
191 | ---------- |
||
192 | track : pandas DataFrame |
||
193 | At a minimum, must contain an X and Y column. The function |
||
194 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
195 | |||
196 | Returns |
||
197 | ------- |
||
198 | kurt : numpy.float64 |
||
199 | Kurtosis of the input track. Calculation based on projected 2D positions |
||
200 | on the dominant eigenvector of the radius of gyration tensor. |
||
201 | |||
202 | Examples |
||
203 | -------- |
||
204 | >>> frames = 5 |
||
205 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
206 | 'X': np.linspace(1, frames, frames)+5, |
||
207 | 'Y': np.linspace(1, frames, frames)+3} |
||
208 | >>> df = pd.DataFrame(data=d) |
||
209 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
210 | >>> kurtosis(df) |
||
211 | 2.5147928994082829 |
||
212 | |||
213 | >>> frames = 10 |
||
214 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
215 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
216 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
217 | >>> df = pd.DataFrame(data=d) |
||
218 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
219 | >>> kurtosis(df) |
||
220 | 1.8515139698652476 |
||
221 | """ |
||
222 | |||
223 | df = track |
||
224 | assert type(df) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
225 | assert type(df['X']) == pd.core.series.Series, "track must contain X column." |
||
226 | assert type(df['Y']) == pd.core.series.Series, "track must contain Y column." |
||
227 | assert df.shape[0] > 0, "track must not be empty." |
||
228 | |||
229 | l1, l2, v1, v2 = gyration_tensor(df) |
||
230 | projection = df['X']*v1[0] + df['Y']*v1[1] |
||
231 | |||
232 | kurt = np.mean((projection - np.mean(projection))**4/(np.std(projection)**4)) |
||
233 | |||
234 | return kurt |
||
235 | |||
236 | |||
237 | View Code Duplication | def asymmetry(track): |
|
238 | """ |
||
239 | Calculates the asymmetry of the trajectory. |
||
240 | |||
241 | Parameters |
||
242 | ---------- |
||
243 | track : pandas DataFrame |
||
244 | At a minimum, must contain an X and Y column. The function |
||
245 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
246 | |||
247 | Returns |
||
248 | ------- |
||
249 | l1 : numpy.float64 |
||
250 | Dominant eigenvalue of the gyration tensor. |
||
251 | l2 : numpy.float64 |
||
252 | Secondary eigenvalue of the gyration tensor. |
||
253 | a1 : numpy.float64 |
||
254 | asymmetry of the input track. Equal to 0 for circularly symmetric tracks, |
||
255 | and 1 for linear tracks. |
||
256 | a2 : numpy.float64 |
||
257 | alternate definition of asymmetry. Equal to 1 for circularly |
||
258 | symmetric tracks, and 0 for linear tracks. |
||
259 | a3 : numpy.float64 |
||
260 | alternate definition of asymmetry. |
||
261 | |||
262 | Examples |
||
263 | -------- |
||
264 | >>> frames = 10 |
||
265 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
266 | 'X': np.linspace(1, frames, frames)+5, |
||
267 | 'Y': np.linspace(1, frames, frames)+3} |
||
268 | >>> df = pd.DataFrame(data=d) |
||
269 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
270 | >>> asymmetry(df) |
||
271 | (16.5, 0.0, 1.0, 0.0, 0.69314718055994529) |
||
272 | |||
273 | >>> frames = 10 |
||
274 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
275 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
276 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
277 | >>> df = pd.DataFrame(data=d) |
||
278 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
279 | >>> asymmetry(df) |
||
280 | (0.53232560128104522, |
||
281 | 0.42766829138901619, |
||
282 | 0.046430119259539708, |
||
283 | 0.80339606128247354, |
||
284 | 0.0059602683290953052) |
||
285 | """ |
||
286 | |||
287 | assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
288 | assert type(track['X']) == pd.core.series.Series, "track must contain X column." |
||
289 | assert type(track['Y']) == pd.core.series.Series, "track must contain Y column." |
||
290 | assert track.shape[0] > 0, "track must not be empty." |
||
291 | |||
292 | l1, l2, v1, v2 = gyration_tensor(track) |
||
293 | a1 = (l1**2 - l2**2)**2/(l1**2 + l2**2)**2 |
||
294 | a2 = l2/l1 |
||
295 | a3 = -np.log(1-((l1-l2)**2)/(2*(l1+l2)**2)) |
||
296 | |||
297 | return l1, l2, a1, a2, a3 |
||
298 | |||
299 | |||
300 | View Code Duplication | def minBoundingRect(df): |
|
301 | """ |
||
302 | Calculates the minimum bounding rectangle of an input trajectory. |
||
303 | |||
304 | Parameters |
||
305 | ---------- |
||
306 | df : pandas DataFrame |
||
307 | At a minimum, must contain an X and Y column. The function |
||
308 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
309 | |||
310 | Returns |
||
311 | ------- |
||
312 | rot_angle : numpy.float64 |
||
313 | Angle of rotation of the bounding box. |
||
314 | area : numpy.float64 |
||
315 | Area of the bounding box. |
||
316 | width : numpy.float64 |
||
317 | Width of the bounding box. |
||
318 | height : numpy.float64 |
||
319 | Height of the bounding box. |
||
320 | center_point : 2 x 1 numpy.ndarray of numpy.float64 objects |
||
321 | Center point of the bounding box. |
||
322 | corner_points : 4 x 2 numpy.ndarray of numpy.float64 objects |
||
323 | Corner points of the bounding box. |
||
324 | |||
325 | Examples |
||
326 | -------- |
||
327 | >>> frames = 10 |
||
328 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
329 | 'X': np.linspace(1, frames, frames)+5, |
||
330 | 'Y': np.linspace(1, frames, frames)+3} |
||
331 | >>> df = pd.DataFrame(data=d) |
||
332 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
333 | >>> minBoundingRect(df) |
||
334 | (-2.3561944901923448, |
||
335 | 2.8261664256307952e-14, |
||
336 | 12.727922061357855, |
||
337 | 2.2204460492503131e-15, |
||
338 | array([ 10.5, 8.5]), |
||
339 | array([[ 6., 4.], |
||
340 | [ 15., 13.], |
||
341 | [ 15., 13.], |
||
342 | [ 6., 4.]])) |
||
343 | |||
344 | >>> frames = 10 |
||
345 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
346 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
347 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
348 | >>> df = pd.DataFrame(data=d) |
||
349 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
350 | >>> minBoundingRect(df) |
||
351 | (0.78318530717958657, |
||
352 | 3.6189901131223992, |
||
353 | 1.9949899732081091, |
||
354 | 1.8140392491811692, |
||
355 | array([ 3.02076903, 2.97913884]), |
||
356 | array([[ 4.3676025 , 3.04013439], |
||
357 | [ 2.95381341, 1.63258851], |
||
358 | [ 1.67393557, 2.9181433 ], |
||
359 | [ 3.08772466, 4.32568917]])) |
||
360 | |||
361 | Notes |
||
362 | ----- |
||
363 | Based off of code from the following repo: |
||
364 | https://github.com/dbworth/minimum-area-bounding-rectangle/blob/master/python/min_bounding_rect.py |
||
365 | """ |
||
366 | |||
367 | assert type(df) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
368 | assert type(df['X']) == pd.core.series.Series, "track must contain X column." |
||
369 | assert type(df['Y']) == pd.core.series.Series, "track must contain Y column." |
||
370 | assert df.shape[0] > 0, "track must not be empty." |
||
371 | |||
372 | df2 = np.zeros((df.shape[0]+1, 2)) |
||
373 | df2[:-1, :] = df[['X', 'Y']].values |
||
374 | df2[-1, :] = df[['X', 'Y']].values[0, :] |
||
375 | hull_points_2d = df2 |
||
376 | |||
377 | edges = np.zeros((len(hull_points_2d)-1, 2)) |
||
378 | |||
379 | for i in range(len(edges)): |
||
380 | edge_x = hull_points_2d[i+1, 0] - hull_points_2d[i, 0] |
||
381 | edge_y = hull_points_2d[i+1, 1] - hull_points_2d[i, 1] |
||
382 | edges[i] = [edge_x, edge_y] |
||
383 | |||
384 | edge_angles = np.zeros((len(edges))) |
||
385 | |||
386 | for i in range(len(edge_angles)): |
||
387 | edge_angles[i] = math.atan2(edges[i, 1], edges[i, 0]) |
||
388 | edge_angles = np.unique(edge_angles) |
||
389 | |||
390 | start_area = platform_c_maxint = 2 ** (struct.Struct('i').size * 8 - 1) - 1 |
||
391 | min_bbox = (0, start_area, 0, 0, 0, 0, 0, 0) |
||
392 | for i in range(len(edge_angles)): |
||
393 | R = np.array([[math.cos(edge_angles[i]), math.cos(edge_angles[i]-(math.pi/2))], |
||
394 | [math.cos(edge_angles[i]+(math.pi/2)), math.cos(edge_angles[i])]]) |
||
395 | |||
396 | rot_points = np.dot(R, np.transpose(hull_points_2d)) |
||
397 | |||
398 | min_x = np.nanmin(rot_points[0], axis=0) |
||
399 | max_x = np.nanmax(rot_points[0], axis=0) |
||
400 | min_y = np.nanmin(rot_points[1], axis=0) |
||
401 | max_y = np.nanmax(rot_points[1], axis=0) |
||
402 | |||
403 | width = max_x - min_x |
||
404 | height = max_y - min_y |
||
405 | area = width*height |
||
406 | |||
407 | if (area < min_bbox[1]): |
||
408 | min_bbox = (edge_angles[i], area, width, height, min_x, max_x, min_y, max_y) |
||
409 | |||
410 | angle = min_bbox[0] |
||
411 | R = np.array([[math.cos(angle), math.cos(angle-(math.pi/2))], [math.cos(angle+(math.pi/2)), math.cos(angle)]]) |
||
412 | proj_points = np.dot(R, np.transpose(hull_points_2d)) |
||
413 | |||
414 | min_x = min_bbox[4] |
||
415 | max_x = min_bbox[5] |
||
416 | min_y = min_bbox[6] |
||
417 | max_y = min_bbox[7] |
||
418 | |||
419 | center_x = (min_x + max_x)/2 |
||
420 | center_y = (min_y + max_y)/2 |
||
421 | center_point = np.dot([center_x, center_y], R) |
||
422 | |||
423 | corner_points = np.zeros((4, 2)) |
||
424 | corner_points[0] = np.dot([max_x, min_y], R) |
||
425 | corner_points[1] = np.dot([min_x, min_y], R) |
||
426 | corner_points[2] = np.dot([min_x, max_y], R) |
||
427 | corner_points[3] = np.dot([max_x, max_y], R) |
||
428 | |||
429 | return (angle, min_bbox[1], min_bbox[2], min_bbox[3], center_point, corner_points) |
||
430 | |||
431 | |||
432 | View Code Duplication | def aspectratio(track): |
|
433 | """ |
||
434 | Calculates the aspect ratio of the rectangle containing the input track. |
||
435 | |||
436 | Parameters |
||
437 | ---------- |
||
438 | track : pandas DataFrame |
||
439 | At a minimum, must contain an X and Y column. The function |
||
440 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
441 | |||
442 | Returns |
||
443 | ------- |
||
444 | ar : numpy.float64 |
||
445 | aspect ratio of the trajectory. Always >= 1. |
||
446 | elong : numpy.float64 |
||
447 | elongation of the trajectory. A transformation of the aspect ratio given |
||
448 | by 1 - ar**-1. |
||
449 | |||
450 | Examples |
||
451 | -------- |
||
452 | >>> frames = 10 |
||
453 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
454 | 'X': np.linspace(1, frames, frames)+5, |
||
455 | 'Y': np.linspace(1, frames, frames)+3} |
||
456 | >>> df = pd.DataFrame(data=d) |
||
457 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
458 | >>> aspectratio(df) |
||
459 | (5732146505273195.0, 0.99999999999999978) |
||
460 | |||
461 | >>> frames = 10 |
||
462 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
463 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
464 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
465 | >>> df = pd.DataFrame(data=d) |
||
466 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
467 | >>> aspectratio(df) |
||
468 | (1.0997501702946164, 0.090702573174318291) |
||
469 | |||
470 | """ |
||
471 | |||
472 | assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
473 | assert type(track['X']) == pd.core.series.Series, "track must contain X column." |
||
474 | assert type(track['Y']) == pd.core.series.Series, "track must contain Y column." |
||
475 | assert track.shape[0] > 0, "track must not be empty." |
||
476 | |||
477 | rot_angle, area, width, height, center_point, corner_points = minBoundingRect(track) |
||
478 | ar = width/height |
||
479 | if ar > 1: |
||
480 | counter = 1 |
||
481 | else: |
||
482 | ar = 1/ar |
||
483 | elong = 1 - (1/ar) |
||
484 | |||
485 | return ar, elong, center_point |
||
486 | |||
487 | |||
488 | View Code Duplication | def boundedness(track, framerate=1): |
|
489 | """ |
||
490 | Calculates the boundedness, fractal dimension, and trappedness of the input track. |
||
491 | |||
492 | Parameters |
||
493 | ---------- |
||
494 | track : pandas DataFrame |
||
495 | At a minimum, must contain a Frames and a MSDs column. The function |
||
496 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
497 | framerate : framrate of the video being analyzed. Actually cancels out. So |
||
498 | why did I include this. Default is 1. |
||
499 | |||
500 | Returns |
||
501 | ------- |
||
502 | B : numpy.float64 |
||
503 | Boundedness of the input track. Quantifies how much a particle with |
||
504 | diffusion coefficient D is restricted by a circular confinement of radius |
||
505 | r when it diffuses for a time duration N*delt. Defined as B = D*N*delt/r**2. |
||
506 | For this case, D is the short time diffusion coefficient (after 2 frames), |
||
507 | and r is half the maximum distance between any two positions. |
||
508 | Df : numpy.float64 |
||
509 | The fractal path dimension defined as Df = log(N)/log(N*d*l**-1) where L |
||
510 | is the total length (sum over all steplengths), N is the number of steps, |
||
511 | and d is the largest distance between any two positions. |
||
512 | pf : numpy.float64 |
||
513 | The probability that a particle with diffusion coefficient D and traced |
||
514 | for a period of time N*delt is trapped in region r0. Given by |
||
515 | pt = 1 - exp(0.2048 - 0.25117*(D*N*delt/r0**2)) |
||
516 | For this case, D is the short time diffusion coefficient, and r0 is half |
||
517 | the maximum distance between any two positions. |
||
518 | |||
519 | Examples |
||
520 | -------- |
||
521 | >>> frames = 10 |
||
522 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
523 | 'X': np.linspace(1, frames, frames)+5, |
||
524 | 'Y': np.linspace(1, frames, frames)+3} |
||
525 | >>> df = pd.DataFrame(data=d) |
||
526 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
527 | >>> boundedness(df) |
||
528 | (1.0, 1.0000000000000002, 0.045311337970735499) |
||
529 | |||
530 | >>> frames = 10 |
||
531 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
532 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
533 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
534 | >>> df = pd.DataFrame(data=d) |
||
535 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
536 | >>> boundedness(df) |
||
537 | (0.96037058689895005, 2.9989749477908401, 0.03576118370932313) |
||
538 | """ |
||
539 | |||
540 | assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
541 | assert type(track['MSDs']) == pd.core.series.Series, "track must contain MSDs column." |
||
542 | assert type(track['Frame']) == pd.core.series.Series, "track must contain Frame column." |
||
543 | assert track.shape[0] > 0, "track must not be empty." |
||
544 | |||
545 | df = track |
||
546 | |||
547 | if df.shape[0] > 2: |
||
548 | length = df.shape[0] |
||
549 | distance = np.zeros((length, length)) |
||
550 | |||
551 | for frame in range(0, length-1): |
||
552 | distance[frame, 0:length-frame-1] = (np.sqrt(msd.nth_diff(df['X'], frame+1)**2 + msd.nth_diff(df['Y'], frame+1)**2).values) |
||
553 | |||
554 | L = np.sum((np.sqrt(msd.nth_diff(df['X'], 1)**2 + msd.nth_diff(df['Y'], 1)**2).values)) |
||
555 | r = np.max(distance)/2 |
||
556 | N = df['Frame'][df['Frame'].shape[0]-1] |
||
557 | f = N*framerate |
||
558 | D = df['MSDs'][2]/(4*f) |
||
559 | |||
560 | B = D*f/(r**2) |
||
561 | Df = np.log(N)/np.log(N*2*r/L) |
||
562 | pf = 1 - np.exp(0.2048 - 0.25117*(D*f/(r**2))) |
||
563 | else: |
||
564 | B = np.nan |
||
565 | Df = np.nan |
||
566 | pf = np.nan |
||
567 | |||
568 | return B, Df, pf |
||
569 | |||
570 | |||
571 | View Code Duplication | def efficiency(track): |
|
572 | """ |
||
573 | Calculates the efficiency and straitness of the input track |
||
574 | |||
575 | Parameters |
||
576 | ---------- |
||
577 | track : pandas DataFrame |
||
578 | At a minimum, must contain a Frames and a MSDs column. The function |
||
579 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
580 | |||
581 | Returns |
||
582 | ------- |
||
583 | eff : numpy.float64 |
||
584 | Efficiency of the input track. Relates the sum of squared step |
||
585 | lengths. Based on Helmuth et al. (2007) and defined as: |
||
586 | E = |x(N-1)-x(0)|**2/SUM(|x(i) - x(i-1)|**2 |
||
587 | strait : numpy.float64 |
||
588 | Relates the net displacement L to teh sum of step lengths and is |
||
589 | defined as: |
||
590 | S = |x(N-1)-x(0)|/SUM(|x(i) - x(i-1)| |
||
591 | |||
592 | Examples |
||
593 | -------- |
||
594 | >>> frames = 10 |
||
595 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
596 | 'X': np.linspace(1, frames, frames)+5, |
||
597 | 'Y': np.linspace(1, frames, frames)+3} |
||
598 | >>> df = pd.DataFrame(data=d) |
||
599 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
600 | >>> ft.efficiency(df) |
||
601 | (9.0, 0.9999999999999999) |
||
602 | |||
603 | >>> frames = 10 |
||
604 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
605 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
606 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
607 | >>> df = pd.DataFrame(data=d) |
||
608 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
609 | >>> ft.efficiency(df) |
||
610 | (0.46192924086141945, 0.22655125514290225) |
||
611 | """ |
||
612 | |||
613 | df = track |
||
614 | length = df.shape[0] |
||
615 | num = (msd.nth_diff(df['X'], length-1)**2 + msd.nth_diff(df['Y'], length-1)**2)[0] |
||
616 | num2 = np.sqrt(num) |
||
617 | |||
618 | den = np.sum(msd.nth_diff(df['X'], 1)**2 + msd.nth_diff(df['Y'], 1)**2) |
||
619 | den2 = np.sum(np.sqrt(msd.nth_diff(df['X'], 1)**2 + msd.nth_diff(df['Y'], 1)**2)) |
||
620 | |||
621 | eff = num/den |
||
622 | strait = num2/den2 |
||
623 | return eff, strait |
||
624 | |||
625 | |||
626 | View Code Duplication | def msd_ratio(track, n1=3, n2=100): |
|
627 | """ |
||
628 | Calculates the MSD ratio of the input track at the specified frames. |
||
629 | |||
630 | Parameters |
||
631 | ---------- |
||
632 | track : pandas DataFrame |
||
633 | At a minimum, must contain a Frames and a MSDs column. The function |
||
634 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
635 | n1 : int |
||
636 | First frame at which to calculate the MSD ratio. |
||
637 | n2 : int |
||
638 | Last frame at which to calculate the MSD ratio. |
||
639 | |||
640 | Returns |
||
641 | ------- |
||
642 | ratio: numpy.float64 |
||
643 | MSD ratio as defined by |
||
644 | [MSD(n1)/MSD(n2)] - [n1/n2] |
||
645 | where n1 < n2. For Brownian motion, it is 0; for restricted motion it |
||
646 | is < 0. For directed motion it is > 0. |
||
647 | |||
648 | Examples |
||
649 | -------- |
||
650 | >>> frames = 10 |
||
651 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
652 | 'X': np.linspace(1, frames, frames)+5, |
||
653 | 'Y': np.linspace(1, frames, frames)+3} |
||
654 | >>> df = pd.DataFrame(data=d) |
||
655 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
656 | >>> ft.msd_ratio(df, 1, 9) |
||
657 | -0.18765432098765433 |
||
658 | |||
659 | >>> frames = 10 |
||
660 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
661 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
662 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
663 | >>> df = pd.DataFrame(data=d) |
||
664 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
665 | >>> ft.msd_ratio(df, 1, 9) |
||
666 | 0.04053708075268797 |
||
667 | """ |
||
668 | |||
669 | df = track |
||
670 | assert n1 < n2, "n1 must be less than n2" |
||
671 | ratio = (df['MSDs'][n1]/df['MSDs'][n2]) - (df['Frame'][n1]/df['Frame'][n2]) |
||
672 | return ratio |
||
673 | |||
674 | |||
675 | View Code Duplication | def calculate_features(df, framerate=1): |
|
676 | """ |
||
677 | Calculates multiple features from input MSD dataset and stores in pandas dataframe. |
||
678 | |||
679 | Parameters |
||
680 | ---------- |
||
681 | df : pandas dataframe |
||
682 | Output from msd.all_msds2. Must have at a minimum the following columns: |
||
683 | Track_ID, Frame, X, Y, and MSDs. |
||
684 | framerate : int or float64 |
||
685 | Framerate of the input videos from which trajectories were calculated. Required |
||
686 | for accurate calculation of some features. Default is 1. Possibly not required. |
||
687 | Ignore if performing all calcuations without units. |
||
688 | |||
689 | Returns |
||
690 | ------- |
||
691 | di: pandas dataframe |
||
692 | Contains a row for each trajectory in df. Holds the following features of each |
||
693 | trajetory: Track_ID, alpha, D_fit, kurtosis, asymmetry1, asymmetry2, asymmetry3, |
||
694 | aspect ratio (AR), elongation, boundedness, fractal dimension (fractal_dim), |
||
695 | trappedness, efficiency, straightness, MSD ratio, frames, X, and Y. |
||
696 | |||
697 | Examples |
||
698 | -------- |
||
699 | See example outputs from individual feature functions. |
||
700 | """ |
||
701 | # Skeleton of Trajectory features metadata table. |
||
702 | # Builds entry for each unique Track ID. |
||
703 | holder = df.Track_ID.unique().astype(float) |
||
704 | die = {'Track_ID': holder, |
||
705 | 'alpha': holder, |
||
706 | 'D_fit': holder, |
||
707 | 'kurtosis': holder, |
||
708 | 'asymmetry1': holder, |
||
709 | 'asymmetry2': holder, |
||
710 | 'asymmetry3': holder, |
||
711 | 'AR': holder, |
||
712 | 'elongation': holder, |
||
713 | 'boundedness': holder, |
||
714 | 'fractal_dim': holder, |
||
715 | 'trappedness': holder, |
||
716 | 'efficiency': holder, |
||
717 | 'straightness': holder, |
||
718 | 'MSD_ratio': holder, |
||
719 | 'frames': holder, |
||
720 | 'X': holder, |
||
721 | 'Y': holder} |
||
722 | |||
723 | di = pd.DataFrame(data=die) |
||
724 | |||
725 | trackids = df.Track_ID.unique() |
||
726 | partcount = trackids.shape[0] |
||
727 | |||
728 | for particle in range(0, partcount): |
||
729 | single_track_masked = df.loc[df['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'], |
||
730 | ascending=[1, 1]).reset_index(drop=True) |
||
731 | single_track = unmask_track(single_track_masked) |
||
732 | di['alpha'][particle], di['D_fit'][particle] = alpha_calc(single_track) |
||
733 | di['kurtosis'][particle] = kurtosis(single_track) |
||
734 | l1, l2, di['asymmetry1'][particle], di['asymmetry2'][particle], di['asymmetry3'][particle] = asymmetry(single_track) |
||
735 | di['AR'][particle], di['elongation'][particle], (di['X'][particle], di['Y'][particle]) = aspectratio(single_track) |
||
736 | di['boundedness'][particle], di['fractal_dim'][particle], di['trappedness'][particle] = boundedness(single_track, framerate) |
||
737 | di['efficiency'][particle], di['straightness'][particle] = efficiency(single_track) |
||
738 | di['frames'][particle] = single_track.shape[0] |
||
739 | if single_track['Frame'][single_track.shape[0]-2] > 2: |
||
740 | di['MSD_ratio'][particle] = msd_ratio(single_track, 2, single_track['Frame'][single_track.shape[0]-2]) |
||
741 | else: |
||
742 | di['MSD_ratio'][particle] = 0 |
||
743 | |||
744 | return di |
||
745 |
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.