@@ 2313-2350 (lines=38) @@ | ||
2310 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2311 | ||
2312 | ||
2313 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2314 | """Return the Smith-Waterman score of two strings. |
|
2315 | ||
2316 | The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance |
|
2317 | measure, differing from Needleman-Wunsch in that it focuses on local |
|
2318 | alignment and disallows negative scores. |
|
2319 | ||
2320 | :param str src, tar: two strings to be compared |
|
2321 | :param float gap_cost: the cost of an alignment gap (1 by default) |
|
2322 | :param function sim_func: a function that returns the similarity of two |
|
2323 | characters (identity similarity by default) |
|
2324 | :returns: Smith-Waterman score |
|
2325 | :rtype: int (in fact dependent on the gap_cost & return value of sim_func) |
|
2326 | ||
2327 | >>> smith_waterman('cat', 'hat') |
|
2328 | 2.0 |
|
2329 | >>> smith_waterman('Niall', 'Neil') |
|
2330 | 1.0 |
|
2331 | >>> smith_waterman('aluminum', 'Catalan') |
|
2332 | 0.0 |
|
2333 | >>> smith_waterman('ATCG', 'TAGC') |
|
2334 | 1.0 |
|
2335 | """ |
|
2336 | d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32) |
|
2337 | ||
2338 | for i in range(len(src)+1): |
|
2339 | d_mat[i, 0] = 0 |
|
2340 | for j in range(len(tar)+1): |
|
2341 | d_mat[0, j] = 0 |
|
2342 | for i in range(1, len(src)+1): |
|
2343 | for j in range(1, len(tar)+1): |
|
2344 | match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
|
2345 | delete = d_mat[i-1, j] - gap_cost |
|
2346 | insert = d_mat[i, j-1] - gap_cost |
|
2347 | d_mat[i, j] = max(0, match, delete, insert) |
|
2348 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2349 | ||
2350 | ||
2351 | def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident): |
|
2352 | """Return the Gotoh score of two strings. |
|
2353 | ||
@@ 2276-2312 (lines=37) @@ | ||
2273 | return mismatch_cost |
|
2274 | ||
2275 | ||
2276 | def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2277 | """Return the Needleman-Wunsch score of two strings. |
|
2278 | ||
2279 | The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit |
|
2280 | distance measure. |
|
2281 | ||
2282 | :param str src, tar: two strings to be compared |
|
2283 | :param float gap_cost: the cost of an alignment gap (1 by default) |
|
2284 | :param function sim_func: a function that returns the similarity of two |
|
2285 | characters (identity similarity by default) |
|
2286 | :returns: Needleman-Wunsch score |
|
2287 | :rtype: int (in fact dependent on the gap_cost & return value of sim_func) |
|
2288 | ||
2289 | >>> needleman_wunsch('cat', 'hat') |
|
2290 | 2.0 |
|
2291 | >>> needleman_wunsch('Niall', 'Neil') |
|
2292 | 1.0 |
|
2293 | >>> needleman_wunsch('aluminum', 'Catalan') |
|
2294 | -1.0 |
|
2295 | >>> needleman_wunsch('ATCG', 'TAGC') |
|
2296 | 0.0 |
|
2297 | """ |
|
2298 | d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32) |
|
2299 | ||
2300 | for i in range(len(src)+1): |
|
2301 | d_mat[i, 0] = -(i * gap_cost) |
|
2302 | for j in range(len(tar)+1): |
|
2303 | d_mat[0, j] = -(j * gap_cost) |
|
2304 | for i in range(1, len(src)+1): |
|
2305 | for j in range(1, len(tar)+1): |
|
2306 | match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
|
2307 | delete = d_mat[i-1, j] - gap_cost |
|
2308 | insert = d_mat[i, j-1] - gap_cost |
|
2309 | d_mat[i, j] = max(match, delete, insert) |
|
2310 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2311 | ||
2312 | ||
2313 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2314 | """Return the Smith-Waterman score of two strings. |
|
2315 |