| Total Complexity | 46 |
| Total Lines | 745 |
| Duplicated Lines | 95.44 % |
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like diff_classifier.features often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import numpy as np |
||
|
|
|||
| 2 | import pandas as pd |
||
| 3 | import numpy.ma as ma |
||
| 4 | from scipy.optimize import curve_fit |
||
| 5 | import numpy.linalg as LA |
||
| 6 | import math |
||
| 7 | import struct |
||
| 8 | import sys |
||
| 9 | |||
| 10 | import diff_classifier.utils as ut |
||
| 11 | import diff_classifier.msd as msd |
||
| 12 | |||
| 13 | |||
| 14 | View Code Duplication | def unmask_track(track): |
|
| 15 | """ |
||
| 16 | Removes empty frames from a track in an MSD pandas dataframe. |
||
| 17 | |||
| 18 | Parameters |
||
| 19 | ---------- |
||
| 20 | track : pandas Dataframe |
||
| 21 | At a minimum, must contain a Frame, Track_ID, X, Y, MSDs, and |
||
| 22 | Gauss column. |
||
| 23 | |||
| 24 | Returns |
||
| 25 | ------- |
||
| 26 | comp_track : pandas Dataframe |
||
| 27 | Similar to track, but has all masked components removed. |
||
| 28 | |||
| 29 | Examples |
||
| 30 | -------- |
||
| 31 | |||
| 32 | """ |
||
| 33 | x = ma.masked_invalid(track['X']) |
||
| 34 | msd = ma.masked_invalid(track['MSDs']) |
||
| 35 | x_mask = ma.getmask(x) |
||
| 36 | msd_mask = ma.getmask(msd) |
||
| 37 | comp_frame = ma.compressed(ma.masked_where(msd_mask, track['Frame'])) |
||
| 38 | comp_ID = ma.compressed(ma.masked_where(msd_mask, track['Track_ID'])) |
||
| 39 | comp_x = ma.compressed(ma.masked_where(x_mask, track['X'])) |
||
| 40 | comp_y = ma.compressed(ma.masked_where(x_mask, track['Y'])) |
||
| 41 | comp_msd = ma.compressed(ma.masked_where(msd_mask, track['MSDs'])) |
||
| 42 | comp_gauss = ma.compressed(ma.masked_where(msd_mask, track['Gauss'])) |
||
| 43 | |||
| 44 | d = {'Frame': comp_frame, |
||
| 45 | 'Track_ID': comp_ID, |
||
| 46 | 'X': comp_x, |
||
| 47 | 'Y': comp_y, |
||
| 48 | 'MSDs': comp_msd, |
||
| 49 | 'Gauss': comp_gauss |
||
| 50 | } |
||
| 51 | comp_track = pd.DataFrame(data=d) |
||
| 52 | return comp_track |
||
| 53 | |||
| 54 | |||
| 55 | View Code Duplication | def alpha_calc(track): |
|
| 56 | """ |
||
| 57 | Calculates the parameter alpha by fitting track MSD data to a function. |
||
| 58 | |||
| 59 | Parameters |
||
| 60 | ---------- |
||
| 61 | track : pandas DataFrame |
||
| 62 | At a minimum, must contain a Frames and a MSDs column. The function |
||
| 63 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 64 | |||
| 65 | Returns |
||
| 66 | ------- |
||
| 67 | a : numpy.float64 |
||
| 68 | The anomalous exponent derived by fitting MSD values to the function, |
||
| 69 | <r**2(n)> = 4*D*(n*delt)**a |
||
| 70 | D : numpy.float64 |
||
| 71 | The fitted diffusion coefficient derived by fitting MSD values to the |
||
| 72 | function above. |
||
| 73 | |||
| 74 | Examples |
||
| 75 | -------- |
||
| 76 | >>> frames = 5 |
||
| 77 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 78 | 'X': np.linspace(1, frames, frames)+5, |
||
| 79 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 80 | >>> df = pd.DataFrame(data=d) |
||
| 81 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 82 | >>> alpha_calc(df) |
||
| 83 | (2.0000000000000004, 0.4999999999999999) |
||
| 84 | |||
| 85 | >>> frames = 10 |
||
| 86 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 87 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
| 88 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
| 89 | >>> df = pd.DataFrame(data=d) |
||
| 90 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 91 | >>> alpha_calc(df) |
||
| 92 | (0.023690002018364065, 0.5144436515510022) |
||
| 93 | """ |
||
| 94 | |||
| 95 | # assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 96 | # assert type(track['MSDs']) == pd.core.series.Series, "track must contain MSDs column." |
||
| 97 | # assert type(track['Frame']) == pd.core.series.Series, "track must contain Frame column." |
||
| 98 | # assert track.shape[0] > 0, "track must not be empty." |
||
| 99 | |||
| 100 | y = track['MSDs'] |
||
| 101 | x = track['Frame'] |
||
| 102 | |||
| 103 | def msd_alpha(x, a, D): |
||
| 104 | return 4*D*(x**a) |
||
| 105 | |||
| 106 | try: |
||
| 107 | popt, pcov = curve_fit(msd_alpha, x, y) |
||
| 108 | a = popt[0] |
||
| 109 | D = popt[1] |
||
| 110 | except RuntimeError: |
||
| 111 | print('Optimal parameters not found. Print NaN instead.') |
||
| 112 | a = np.nan |
||
| 113 | D = np.nan |
||
| 114 | return a, D |
||
| 115 | |||
| 116 | |||
| 117 | View Code Duplication | def gyration_tensor(track): |
|
| 118 | """ |
||
| 119 | Calculates the eigenvalues and eigenvectors of the gyration tensor of the |
||
| 120 | input trajectory. |
||
| 121 | |||
| 122 | Parameters |
||
| 123 | ---------- |
||
| 124 | track : pandas DataFrame |
||
| 125 | At a minimum, must contain an X and Y column. The function |
||
| 126 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 127 | |||
| 128 | Returns |
||
| 129 | ------- |
||
| 130 | l1 : numpy.float64 |
||
| 131 | Dominant eigenvalue of the gyration tensor. |
||
| 132 | l2 : numpy.float64 |
||
| 133 | Secondary eigenvalue of the gyration tensor. |
||
| 134 | v1 : 2 x 1 numpy.ndarray of numpy.float64 objects |
||
| 135 | Dominant eigenvector of the gyration tensor. |
||
| 136 | v2 : 2 x 1 numpy.ndarray of numpy.float64 objects |
||
| 137 | Secondary eigenvector of the gyration tensor. |
||
| 138 | |||
| 139 | Examples |
||
| 140 | -------- |
||
| 141 | >>> frames = 5 |
||
| 142 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 143 | 'X': np.linspace(1, frames, frames)+5, |
||
| 144 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 145 | >>> df = pd.DataFrame(data=d) |
||
| 146 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 147 | >>> gyration_tensor(df) |
||
| 148 | (4.0, |
||
| 149 | 4.4408920985006262e-16, |
||
| 150 | array([ 0.70710678, -0.70710678]), |
||
| 151 | array([ 0.70710678, 0.70710678])) |
||
| 152 | |||
| 153 | >>> frames = 10 |
||
| 154 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 155 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
| 156 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
| 157 | >>> df = pd.DataFrame(data=d) |
||
| 158 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 159 | >>> gyration_tensor(df) |
||
| 160 | (0.53232560128104522, |
||
| 161 | 0.42766829138901619, |
||
| 162 | array([ 0.6020119 , -0.79848711]), |
||
| 163 | array([-0.79848711, -0.6020119 ])) |
||
| 164 | """ |
||
| 165 | |||
| 166 | df = track |
||
| 167 | assert type(df) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 168 | assert type(df['X']) == pd.core.series.Series, "track must contain X column." |
||
| 169 | assert type(df['Y']) == pd.core.series.Series, "track must contain Y column." |
||
| 170 | assert df.shape[0] > 0, "track must not be empty." |
||
| 171 | |||
| 172 | Ta = np.sum((df['X'] - np.mean(df['X']))**2)/df['X'].shape[0] |
||
| 173 | Tb = np.sum((df['Y'] - np.mean(df['Y']))**2)/df['Y'].shape[0] |
||
| 174 | Tab = np.sum((df['X'] - np.mean(df['X']))*(df['Y'] - np.mean(df['Y'])))/df['X'].shape[0] |
||
| 175 | |||
| 176 | w, v = LA.eig(np.array([[Ta, Tab], [Tab, Tb]])) |
||
| 177 | dom = np.argmax(np.abs(w)) |
||
| 178 | rec = np.argmin(np.abs(w)) |
||
| 179 | l1 = w[dom] |
||
| 180 | l2 = w[rec] |
||
| 181 | v1 = v[dom] |
||
| 182 | v2 = v[rec] |
||
| 183 | return l1, l2, v1, v2 |
||
| 184 | |||
| 185 | |||
| 186 | View Code Duplication | def kurtosis(track): |
|
| 187 | """ |
||
| 188 | Calculates the kurtosis of input track. |
||
| 189 | |||
| 190 | Parameters |
||
| 191 | ---------- |
||
| 192 | track : pandas DataFrame |
||
| 193 | At a minimum, must contain an X and Y column. The function |
||
| 194 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 195 | |||
| 196 | Returns |
||
| 197 | ------- |
||
| 198 | kurt : numpy.float64 |
||
| 199 | Kurtosis of the input track. Calculation based on projected 2D positions |
||
| 200 | on the dominant eigenvector of the radius of gyration tensor. |
||
| 201 | |||
| 202 | Examples |
||
| 203 | -------- |
||
| 204 | >>> frames = 5 |
||
| 205 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 206 | 'X': np.linspace(1, frames, frames)+5, |
||
| 207 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 208 | >>> df = pd.DataFrame(data=d) |
||
| 209 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 210 | >>> kurtosis(df) |
||
| 211 | 2.5147928994082829 |
||
| 212 | |||
| 213 | >>> frames = 10 |
||
| 214 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 215 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
| 216 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
| 217 | >>> df = pd.DataFrame(data=d) |
||
| 218 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 219 | >>> kurtosis(df) |
||
| 220 | 1.8515139698652476 |
||
| 221 | """ |
||
| 222 | |||
| 223 | df = track |
||
| 224 | assert type(df) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 225 | assert type(df['X']) == pd.core.series.Series, "track must contain X column." |
||
| 226 | assert type(df['Y']) == pd.core.series.Series, "track must contain Y column." |
||
| 227 | assert df.shape[0] > 0, "track must not be empty." |
||
| 228 | |||
| 229 | l1, l2, v1, v2 = gyration_tensor(df) |
||
| 230 | projection = df['X']*v1[0] + df['Y']*v1[1] |
||
| 231 | |||
| 232 | kurt = np.mean((projection - np.mean(projection))**4/(np.std(projection)**4)) |
||
| 233 | |||
| 234 | return kurt |
||
| 235 | |||
| 236 | |||
| 237 | View Code Duplication | def asymmetry(track): |
|
| 238 | """ |
||
| 239 | Calculates the asymmetry of the trajectory. |
||
| 240 | |||
| 241 | Parameters |
||
| 242 | ---------- |
||
| 243 | track : pandas DataFrame |
||
| 244 | At a minimum, must contain an X and Y column. The function |
||
| 245 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 246 | |||
| 247 | Returns |
||
| 248 | ------- |
||
| 249 | l1 : numpy.float64 |
||
| 250 | Dominant eigenvalue of the gyration tensor. |
||
| 251 | l2 : numpy.float64 |
||
| 252 | Secondary eigenvalue of the gyration tensor. |
||
| 253 | a1 : numpy.float64 |
||
| 254 | asymmetry of the input track. Equal to 0 for circularly symmetric tracks, |
||
| 255 | and 1 for linear tracks. |
||
| 256 | a2 : numpy.float64 |
||
| 257 | alternate definition of asymmetry. Equal to 1 for circularly |
||
| 258 | symmetric tracks, and 0 for linear tracks. |
||
| 259 | a3 : numpy.float64 |
||
| 260 | alternate definition of asymmetry. |
||
| 261 | |||
| 262 | Examples |
||
| 263 | -------- |
||
| 264 | >>> frames = 10 |
||
| 265 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 266 | 'X': np.linspace(1, frames, frames)+5, |
||
| 267 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 268 | >>> df = pd.DataFrame(data=d) |
||
| 269 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 270 | >>> asymmetry(df) |
||
| 271 | (16.5, 0.0, 1.0, 0.0, 0.69314718055994529) |
||
| 272 | |||
| 273 | >>> frames = 10 |
||
| 274 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 275 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
| 276 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
| 277 | >>> df = pd.DataFrame(data=d) |
||
| 278 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 279 | >>> asymmetry(df) |
||
| 280 | (0.53232560128104522, |
||
| 281 | 0.42766829138901619, |
||
| 282 | 0.046430119259539708, |
||
| 283 | 0.80339606128247354, |
||
| 284 | 0.0059602683290953052) |
||
| 285 | """ |
||
| 286 | |||
| 287 | assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 288 | assert type(track['X']) == pd.core.series.Series, "track must contain X column." |
||
| 289 | assert type(track['Y']) == pd.core.series.Series, "track must contain Y column." |
||
| 290 | assert track.shape[0] > 0, "track must not be empty." |
||
| 291 | |||
| 292 | l1, l2, v1, v2 = gyration_tensor(track) |
||
| 293 | a1 = (l1**2 - l2**2)**2/(l1**2 + l2**2)**2 |
||
| 294 | a2 = l2/l1 |
||
| 295 | a3 = -np.log(1-((l1-l2)**2)/(2*(l1+l2)**2)) |
||
| 296 | |||
| 297 | return l1, l2, a1, a2, a3 |
||
| 298 | |||
| 299 | |||
| 300 | View Code Duplication | def minBoundingRect(df): |
|
| 301 | """ |
||
| 302 | Calculates the minimum bounding rectangle of an input trajectory. |
||
| 303 | |||
| 304 | Parameters |
||
| 305 | ---------- |
||
| 306 | df : pandas DataFrame |
||
| 307 | At a minimum, must contain an X and Y column. The function |
||
| 308 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 309 | |||
| 310 | Returns |
||
| 311 | ------- |
||
| 312 | rot_angle : numpy.float64 |
||
| 313 | Angle of rotation of the bounding box. |
||
| 314 | area : numpy.float64 |
||
| 315 | Area of the bounding box. |
||
| 316 | width : numpy.float64 |
||
| 317 | Width of the bounding box. |
||
| 318 | height : numpy.float64 |
||
| 319 | Height of the bounding box. |
||
| 320 | center_point : 2 x 1 numpy.ndarray of numpy.float64 objects |
||
| 321 | Center point of the bounding box. |
||
| 322 | corner_points : 4 x 2 numpy.ndarray of numpy.float64 objects |
||
| 323 | Corner points of the bounding box. |
||
| 324 | |||
| 325 | Examples |
||
| 326 | -------- |
||
| 327 | >>> frames = 10 |
||
| 328 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 329 | 'X': np.linspace(1, frames, frames)+5, |
||
| 330 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 331 | >>> df = pd.DataFrame(data=d) |
||
| 332 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 333 | >>> minBoundingRect(df) |
||
| 334 | (-2.3561944901923448, |
||
| 335 | 2.8261664256307952e-14, |
||
| 336 | 12.727922061357855, |
||
| 337 | 2.2204460492503131e-15, |
||
| 338 | array([ 10.5, 8.5]), |
||
| 339 | array([[ 6., 4.], |
||
| 340 | [ 15., 13.], |
||
| 341 | [ 15., 13.], |
||
| 342 | [ 6., 4.]])) |
||
| 343 | |||
| 344 | >>> frames = 10 |
||
| 345 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 346 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
| 347 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
| 348 | >>> df = pd.DataFrame(data=d) |
||
| 349 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 350 | >>> minBoundingRect(df) |
||
| 351 | (0.78318530717958657, |
||
| 352 | 3.6189901131223992, |
||
| 353 | 1.9949899732081091, |
||
| 354 | 1.8140392491811692, |
||
| 355 | array([ 3.02076903, 2.97913884]), |
||
| 356 | array([[ 4.3676025 , 3.04013439], |
||
| 357 | [ 2.95381341, 1.63258851], |
||
| 358 | [ 1.67393557, 2.9181433 ], |
||
| 359 | [ 3.08772466, 4.32568917]])) |
||
| 360 | |||
| 361 | Notes |
||
| 362 | ----- |
||
| 363 | Based off of code from the following repo: |
||
| 364 | https://github.com/dbworth/minimum-area-bounding-rectangle/blob/master/python/min_bounding_rect.py |
||
| 365 | """ |
||
| 366 | |||
| 367 | assert type(df) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 368 | assert type(df['X']) == pd.core.series.Series, "track must contain X column." |
||
| 369 | assert type(df['Y']) == pd.core.series.Series, "track must contain Y column." |
||
| 370 | assert df.shape[0] > 0, "track must not be empty." |
||
| 371 | |||
| 372 | df2 = np.zeros((df.shape[0]+1, 2)) |
||
| 373 | df2[:-1, :] = df[['X', 'Y']].values |
||
| 374 | df2[-1, :] = df[['X', 'Y']].values[0, :] |
||
| 375 | hull_points_2d = df2 |
||
| 376 | |||
| 377 | edges = np.zeros((len(hull_points_2d)-1, 2)) |
||
| 378 | |||
| 379 | for i in range(len(edges)): |
||
| 380 | edge_x = hull_points_2d[i+1, 0] - hull_points_2d[i, 0] |
||
| 381 | edge_y = hull_points_2d[i+1, 1] - hull_points_2d[i, 1] |
||
| 382 | edges[i] = [edge_x, edge_y] |
||
| 383 | |||
| 384 | edge_angles = np.zeros((len(edges))) |
||
| 385 | |||
| 386 | for i in range(len(edge_angles)): |
||
| 387 | edge_angles[i] = math.atan2(edges[i, 1], edges[i, 0]) |
||
| 388 | edge_angles = np.unique(edge_angles) |
||
| 389 | |||
| 390 | start_area = platform_c_maxint = 2 ** (struct.Struct('i').size * 8 - 1) - 1 |
||
| 391 | min_bbox = (0, start_area, 0, 0, 0, 0, 0, 0) |
||
| 392 | for i in range(len(edge_angles)): |
||
| 393 | R = np.array([[math.cos(edge_angles[i]), math.cos(edge_angles[i]-(math.pi/2))], |
||
| 394 | [math.cos(edge_angles[i]+(math.pi/2)), math.cos(edge_angles[i])]]) |
||
| 395 | |||
| 396 | rot_points = np.dot(R, np.transpose(hull_points_2d)) |
||
| 397 | |||
| 398 | min_x = np.nanmin(rot_points[0], axis=0) |
||
| 399 | max_x = np.nanmax(rot_points[0], axis=0) |
||
| 400 | min_y = np.nanmin(rot_points[1], axis=0) |
||
| 401 | max_y = np.nanmax(rot_points[1], axis=0) |
||
| 402 | |||
| 403 | width = max_x - min_x |
||
| 404 | height = max_y - min_y |
||
| 405 | area = width*height |
||
| 406 | |||
| 407 | if (area < min_bbox[1]): |
||
| 408 | min_bbox = (edge_angles[i], area, width, height, min_x, max_x, min_y, max_y) |
||
| 409 | |||
| 410 | angle = min_bbox[0] |
||
| 411 | R = np.array([[math.cos(angle), math.cos(angle-(math.pi/2))], [math.cos(angle+(math.pi/2)), math.cos(angle)]]) |
||
| 412 | proj_points = np.dot(R, np.transpose(hull_points_2d)) |
||
| 413 | |||
| 414 | min_x = min_bbox[4] |
||
| 415 | max_x = min_bbox[5] |
||
| 416 | min_y = min_bbox[6] |
||
| 417 | max_y = min_bbox[7] |
||
| 418 | |||
| 419 | center_x = (min_x + max_x)/2 |
||
| 420 | center_y = (min_y + max_y)/2 |
||
| 421 | center_point = np.dot([center_x, center_y], R) |
||
| 422 | |||
| 423 | corner_points = np.zeros((4, 2)) |
||
| 424 | corner_points[0] = np.dot([max_x, min_y], R) |
||
| 425 | corner_points[1] = np.dot([min_x, min_y], R) |
||
| 426 | corner_points[2] = np.dot([min_x, max_y], R) |
||
| 427 | corner_points[3] = np.dot([max_x, max_y], R) |
||
| 428 | |||
| 429 | return (angle, min_bbox[1], min_bbox[2], min_bbox[3], center_point, corner_points) |
||
| 430 | |||
| 431 | |||
| 432 | View Code Duplication | def aspectratio(track): |
|
| 433 | """ |
||
| 434 | Calculates the aspect ratio of the rectangle containing the input track. |
||
| 435 | |||
| 436 | Parameters |
||
| 437 | ---------- |
||
| 438 | track : pandas DataFrame |
||
| 439 | At a minimum, must contain an X and Y column. The function |
||
| 440 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 441 | |||
| 442 | Returns |
||
| 443 | ------- |
||
| 444 | ar : numpy.float64 |
||
| 445 | aspect ratio of the trajectory. Always >= 1. |
||
| 446 | elong : numpy.float64 |
||
| 447 | elongation of the trajectory. A transformation of the aspect ratio given |
||
| 448 | by 1 - ar**-1. |
||
| 449 | |||
| 450 | Examples |
||
| 451 | -------- |
||
| 452 | >>> frames = 10 |
||
| 453 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 454 | 'X': np.linspace(1, frames, frames)+5, |
||
| 455 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 456 | >>> df = pd.DataFrame(data=d) |
||
| 457 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 458 | >>> aspectratio(df) |
||
| 459 | (5732146505273195.0, 0.99999999999999978) |
||
| 460 | |||
| 461 | >>> frames = 10 |
||
| 462 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 463 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
| 464 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
| 465 | >>> df = pd.DataFrame(data=d) |
||
| 466 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 467 | >>> aspectratio(df) |
||
| 468 | (1.0997501702946164, 0.090702573174318291) |
||
| 469 | |||
| 470 | """ |
||
| 471 | |||
| 472 | assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 473 | assert type(track['X']) == pd.core.series.Series, "track must contain X column." |
||
| 474 | assert type(track['Y']) == pd.core.series.Series, "track must contain Y column." |
||
| 475 | assert track.shape[0] > 0, "track must not be empty." |
||
| 476 | |||
| 477 | rot_angle, area, width, height, center_point, corner_points = minBoundingRect(track) |
||
| 478 | ar = width/height |
||
| 479 | if ar > 1: |
||
| 480 | counter = 1 |
||
| 481 | else: |
||
| 482 | ar = 1/ar |
||
| 483 | elong = 1 - (1/ar) |
||
| 484 | |||
| 485 | return ar, elong, center_point |
||
| 486 | |||
| 487 | |||
| 488 | View Code Duplication | def boundedness(track, framerate=1): |
|
| 489 | """ |
||
| 490 | Calculates the boundedness, fractal dimension, and trappedness of the input track. |
||
| 491 | |||
| 492 | Parameters |
||
| 493 | ---------- |
||
| 494 | track : pandas DataFrame |
||
| 495 | At a minimum, must contain a Frames and a MSDs column. The function |
||
| 496 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 497 | framerate : framrate of the video being analyzed. Actually cancels out. So |
||
| 498 | why did I include this. Default is 1. |
||
| 499 | |||
| 500 | Returns |
||
| 501 | ------- |
||
| 502 | B : numpy.float64 |
||
| 503 | Boundedness of the input track. Quantifies how much a particle with |
||
| 504 | diffusion coefficient D is restricted by a circular confinement of radius |
||
| 505 | r when it diffuses for a time duration N*delt. Defined as B = D*N*delt/r**2. |
||
| 506 | For this case, D is the short time diffusion coefficient (after 2 frames), |
||
| 507 | and r is half the maximum distance between any two positions. |
||
| 508 | Df : numpy.float64 |
||
| 509 | The fractal path dimension defined as Df = log(N)/log(N*d*l**-1) where L |
||
| 510 | is the total length (sum over all steplengths), N is the number of steps, |
||
| 511 | and d is the largest distance between any two positions. |
||
| 512 | pf : numpy.float64 |
||
| 513 | The probability that a particle with diffusion coefficient D and traced |
||
| 514 | for a period of time N*delt is trapped in region r0. Given by |
||
| 515 | pt = 1 - exp(0.2048 - 0.25117*(D*N*delt/r0**2)) |
||
| 516 | For this case, D is the short time diffusion coefficient, and r0 is half |
||
| 517 | the maximum distance between any two positions. |
||
| 518 | |||
| 519 | Examples |
||
| 520 | -------- |
||
| 521 | >>> frames = 10 |
||
| 522 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 523 | 'X': np.linspace(1, frames, frames)+5, |
||
| 524 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 525 | >>> df = pd.DataFrame(data=d) |
||
| 526 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 527 | >>> boundedness(df) |
||
| 528 | (1.0, 1.0000000000000002, 0.045311337970735499) |
||
| 529 | |||
| 530 | >>> frames = 10 |
||
| 531 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 532 | 'X': np.sin(np.linspace(1, frames, frames)+3), |
||
| 533 | 'Y': np.cos(np.linspace(1, frames, frames)+3)} |
||
| 534 | >>> df = pd.DataFrame(data=d) |
||
| 535 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 536 | >>> boundedness(df) |
||
| 537 | (0.96037058689895005, 2.9989749477908401, 0.03576118370932313) |
||
| 538 | """ |
||
| 539 | |||
| 540 | assert type(track) == pd.core.frame.DataFrame, "track must be a pandas dataframe." |
||
| 541 | assert type(track['MSDs']) == pd.core.series.Series, "track must contain MSDs column." |
||
| 542 | assert type(track['Frame']) == pd.core.series.Series, "track must contain Frame column." |
||
| 543 | assert track.shape[0] > 0, "track must not be empty." |
||
| 544 | |||
| 545 | df = track |
||
| 546 | |||
| 547 | if df.shape[0] > 2: |
||
| 548 | length = df.shape[0] |
||
| 549 | distance = np.zeros((length, length)) |
||
| 550 | |||
| 551 | for frame in range(0, length-1): |
||
| 552 | distance[frame, 0:length-frame-1] = (np.sqrt(msd.nth_diff(df['X'], frame+1)**2 + msd.nth_diff(df['Y'], frame+1)**2).values) |
||
| 553 | |||
| 554 | L = np.sum((np.sqrt(msd.nth_diff(df['X'], 1)**2 + msd.nth_diff(df['Y'], 1)**2).values)) |
||
| 555 | r = np.max(distance)/2 |
||
| 556 | N = df['Frame'][df['Frame'].shape[0]-1] |
||
| 557 | f = N*framerate |
||
| 558 | D = df['MSDs'][2]/(4*f) |
||
| 559 | |||
| 560 | B = D*f/(r**2) |
||
| 561 | Df = np.log(N)/np.log(N*2*r/L) |
||
| 562 | pf = 1 - np.exp(0.2048 - 0.25117*(D*f/(r**2))) |
||
| 563 | else: |
||
| 564 | B = np.nan |
||
| 565 | Df = np.nan |
||
| 566 | pf = np.nan |
||
| 567 | |||
| 568 | return B, Df, pf |
||
| 569 | |||
| 570 | |||
| 571 | View Code Duplication | def efficiency(track): |
|
| 572 | """ |
||
| 573 | Calculates the efficiency and straitness of the input track |
||
| 574 | |||
| 575 | Parameters |
||
| 576 | ---------- |
||
| 577 | track : pandas DataFrame |
||
| 578 | At a minimum, must contain a Frames and a MSDs column. The function |
||
| 579 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 580 | |||
| 581 | Returns |
||
| 582 | ------- |
||
| 583 | eff : numpy.float64 |
||
| 584 | Efficiency of the input track. Relates the sum of squared step |
||
| 585 | lengths. Based on Helmuth et al. (2007) and defined as: |
||
| 586 | E = |x(N-1)-x(0)|**2/SUM(|x(i) - x(i-1)|**2 |
||
| 587 | strait : numpy.float64 |
||
| 588 | Relates the net displacement L to teh sum of step lengths and is |
||
| 589 | defined as: |
||
| 590 | S = |x(N-1)-x(0)|/SUM(|x(i) - x(i-1)| |
||
| 591 | |||
| 592 | Examples |
||
| 593 | -------- |
||
| 594 | >>> frames = 10 |
||
| 595 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 596 | 'X': np.linspace(1, frames, frames)+5, |
||
| 597 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 598 | >>> df = pd.DataFrame(data=d) |
||
| 599 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 600 | >>> ft.efficiency(df) |
||
| 601 | (9.0, 0.9999999999999999) |
||
| 602 | |||
| 603 | >>> frames = 10 |
||
| 604 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 605 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
| 606 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
| 607 | >>> df = pd.DataFrame(data=d) |
||
| 608 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 609 | >>> ft.efficiency(df) |
||
| 610 | (0.46192924086141945, 0.22655125514290225) |
||
| 611 | """ |
||
| 612 | |||
| 613 | df = track |
||
| 614 | length = df.shape[0] |
||
| 615 | num = (msd.nth_diff(df['X'], length-1)**2 + msd.nth_diff(df['Y'], length-1)**2)[0] |
||
| 616 | num2 = np.sqrt(num) |
||
| 617 | |||
| 618 | den = np.sum(msd.nth_diff(df['X'], 1)**2 + msd.nth_diff(df['Y'], 1)**2) |
||
| 619 | den2 = np.sum(np.sqrt(msd.nth_diff(df['X'], 1)**2 + msd.nth_diff(df['Y'], 1)**2)) |
||
| 620 | |||
| 621 | eff = num/den |
||
| 622 | strait = num2/den2 |
||
| 623 | return eff, strait |
||
| 624 | |||
| 625 | |||
| 626 | View Code Duplication | def msd_ratio(track, n1=3, n2=100): |
|
| 627 | """ |
||
| 628 | Calculates the MSD ratio of the input track at the specified frames. |
||
| 629 | |||
| 630 | Parameters |
||
| 631 | ---------- |
||
| 632 | track : pandas DataFrame |
||
| 633 | At a minimum, must contain a Frames and a MSDs column. The function |
||
| 634 | msd_calc can be used to generate the correctly formatted pd dataframe. |
||
| 635 | n1 : int |
||
| 636 | First frame at which to calculate the MSD ratio. |
||
| 637 | n2 : int |
||
| 638 | Last frame at which to calculate the MSD ratio. |
||
| 639 | |||
| 640 | Returns |
||
| 641 | ------- |
||
| 642 | ratio: numpy.float64 |
||
| 643 | MSD ratio as defined by |
||
| 644 | [MSD(n1)/MSD(n2)] - [n1/n2] |
||
| 645 | where n1 < n2. For Brownian motion, it is 0; for restricted motion it |
||
| 646 | is < 0. For directed motion it is > 0. |
||
| 647 | |||
| 648 | Examples |
||
| 649 | -------- |
||
| 650 | >>> frames = 10 |
||
| 651 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 652 | 'X': np.linspace(1, frames, frames)+5, |
||
| 653 | 'Y': np.linspace(1, frames, frames)+3} |
||
| 654 | >>> df = pd.DataFrame(data=d) |
||
| 655 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 656 | >>> ft.msd_ratio(df, 1, 9) |
||
| 657 | -0.18765432098765433 |
||
| 658 | |||
| 659 | >>> frames = 10 |
||
| 660 | >>> d = {'Frame': np.linspace(1, frames, frames), |
||
| 661 | 'X': np.sin(np.linspace(1, frames, frames))+3, |
||
| 662 | 'Y': np.cos(np.linspace(1, frames, frames))+3} |
||
| 663 | >>> df = pd.DataFrame(data=d) |
||
| 664 | >>> df['MSDs'], df['Gauss'] = msd_calc(df) |
||
| 665 | >>> ft.msd_ratio(df, 1, 9) |
||
| 666 | 0.04053708075268797 |
||
| 667 | """ |
||
| 668 | |||
| 669 | df = track |
||
| 670 | assert n1 < n2, "n1 must be less than n2" |
||
| 671 | ratio = (df['MSDs'][n1]/df['MSDs'][n2]) - (df['Frame'][n1]/df['Frame'][n2]) |
||
| 672 | return ratio |
||
| 673 | |||
| 674 | |||
| 675 | View Code Duplication | def calculate_features(df, framerate=1): |
|
| 676 | """ |
||
| 677 | Calculates multiple features from input MSD dataset and stores in pandas dataframe. |
||
| 678 | |||
| 679 | Parameters |
||
| 680 | ---------- |
||
| 681 | df : pandas dataframe |
||
| 682 | Output from msd.all_msds2. Must have at a minimum the following columns: |
||
| 683 | Track_ID, Frame, X, Y, and MSDs. |
||
| 684 | framerate : int or float64 |
||
| 685 | Framerate of the input videos from which trajectories were calculated. Required |
||
| 686 | for accurate calculation of some features. Default is 1. Possibly not required. |
||
| 687 | Ignore if performing all calcuations without units. |
||
| 688 | |||
| 689 | Returns |
||
| 690 | ------- |
||
| 691 | di: pandas dataframe |
||
| 692 | Contains a row for each trajectory in df. Holds the following features of each |
||
| 693 | trajetory: Track_ID, alpha, D_fit, kurtosis, asymmetry1, asymmetry2, asymmetry3, |
||
| 694 | aspect ratio (AR), elongation, boundedness, fractal dimension (fractal_dim), |
||
| 695 | trappedness, efficiency, straightness, MSD ratio, frames, X, and Y. |
||
| 696 | |||
| 697 | Examples |
||
| 698 | -------- |
||
| 699 | See example outputs from individual feature functions. |
||
| 700 | """ |
||
| 701 | # Skeleton of Trajectory features metadata table. |
||
| 702 | # Builds entry for each unique Track ID. |
||
| 703 | holder = df.Track_ID.unique().astype(float) |
||
| 704 | die = {'Track_ID': holder, |
||
| 705 | 'alpha': holder, |
||
| 706 | 'D_fit': holder, |
||
| 707 | 'kurtosis': holder, |
||
| 708 | 'asymmetry1': holder, |
||
| 709 | 'asymmetry2': holder, |
||
| 710 | 'asymmetry3': holder, |
||
| 711 | 'AR': holder, |
||
| 712 | 'elongation': holder, |
||
| 713 | 'boundedness': holder, |
||
| 714 | 'fractal_dim': holder, |
||
| 715 | 'trappedness': holder, |
||
| 716 | 'efficiency': holder, |
||
| 717 | 'straightness': holder, |
||
| 718 | 'MSD_ratio': holder, |
||
| 719 | 'frames': holder, |
||
| 720 | 'X': holder, |
||
| 721 | 'Y': holder} |
||
| 722 | |||
| 723 | di = pd.DataFrame(data=die) |
||
| 724 | |||
| 725 | trackids = df.Track_ID.unique() |
||
| 726 | partcount = trackids.shape[0] |
||
| 727 | |||
| 728 | for particle in range(0, partcount): |
||
| 729 | single_track_masked = df.loc[df['Track_ID'] == trackids[particle]].sort_values(['Track_ID', 'Frame'], |
||
| 730 | ascending=[1, 1]).reset_index(drop=True) |
||
| 731 | single_track = unmask_track(single_track_masked) |
||
| 732 | di['alpha'][particle], di['D_fit'][particle] = alpha_calc(single_track) |
||
| 733 | di['kurtosis'][particle] = kurtosis(single_track) |
||
| 734 | l1, l2, di['asymmetry1'][particle], di['asymmetry2'][particle], di['asymmetry3'][particle] = asymmetry(single_track) |
||
| 735 | di['AR'][particle], di['elongation'][particle], (di['X'][particle], di['Y'][particle]) = aspectratio(single_track) |
||
| 736 | di['boundedness'][particle], di['fractal_dim'][particle], di['trappedness'][particle] = boundedness(single_track, framerate) |
||
| 737 | di['efficiency'][particle], di['straightness'][particle] = efficiency(single_track) |
||
| 738 | di['frames'][particle] = single_track.shape[0] |
||
| 739 | if single_track['Frame'][single_track.shape[0]-2] > 2: |
||
| 740 | di['MSD_ratio'][particle] = msd_ratio(single_track, 2, single_track['Frame'][single_track.shape[0]-2]) |
||
| 741 | else: |
||
| 742 | di['MSD_ratio'][particle] = 0 |
||
| 743 | |||
| 744 | return di |
||
| 745 |
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.