@@ 2409-2446 (lines=38) @@ | ||
2406 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2407 | ||
2408 | ||
2409 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2410 | """Return the Smith-Waterman score of two strings. |
|
2411 | ||
2412 | The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance |
|
2413 | measure, differing from Needleman-Wunsch in that it focuses on local |
|
2414 | alignment and disallows negative scores. |
|
2415 | ||
2416 | :param str src: source string for comparison |
|
2417 | :param str tar: target string for comparison |
|
2418 | :param float gap_cost: the cost of an alignment gap (1 by default) |
|
2419 | :param function sim_func: a function that returns the similarity of two |
|
2420 | characters (identity similarity by default) |
|
2421 | :returns: Smith-Waterman score |
|
2422 | :rtype: float |
|
2423 | ||
2424 | >>> smith_waterman('cat', 'hat') |
|
2425 | 2.0 |
|
2426 | >>> smith_waterman('Niall', 'Neil') |
|
2427 | 1.0 |
|
2428 | >>> smith_waterman('aluminum', 'Catalan') |
|
2429 | 0.0 |
|
2430 | >>> smith_waterman('ATCG', 'TAGC') |
|
2431 | 1.0 |
|
2432 | """ |
|
2433 | d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32) |
|
2434 | ||
2435 | for i in range(len(src)+1): |
|
2436 | d_mat[i, 0] = 0 |
|
2437 | for j in range(len(tar)+1): |
|
2438 | d_mat[0, j] = 0 |
|
2439 | for i in range(1, len(src)+1): |
|
2440 | for j in range(1, len(tar)+1): |
|
2441 | match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
|
2442 | delete = d_mat[i-1, j] - gap_cost |
|
2443 | insert = d_mat[i, j-1] - gap_cost |
|
2444 | d_mat[i, j] = max(0, match, delete, insert) |
|
2445 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2446 | ||
2447 | ||
2448 | def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident): |
|
2449 | """Return the Gotoh score of two strings. |
|
@@ 2371-2407 (lines=37) @@ | ||
2368 | return mismatch_cost |
|
2369 | ||
2370 | ||
2371 | def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2372 | """Return the Needleman-Wunsch score of two strings. |
|
2373 | ||
2374 | The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit |
|
2375 | distance measure. |
|
2376 | ||
2377 | :param str src: source string for comparison |
|
2378 | :param str tar: target string for comparison |
|
2379 | :param float gap_cost: the cost of an alignment gap (1 by default) |
|
2380 | :param function sim_func: a function that returns the similarity of two |
|
2381 | characters (identity similarity by default) |
|
2382 | :returns: Needleman-Wunsch score |
|
2383 | :rtype: float |
|
2384 | ||
2385 | >>> needleman_wunsch('cat', 'hat') |
|
2386 | 2.0 |
|
2387 | >>> needleman_wunsch('Niall', 'Neil') |
|
2388 | 1.0 |
|
2389 | >>> needleman_wunsch('aluminum', 'Catalan') |
|
2390 | -1.0 |
|
2391 | >>> needleman_wunsch('ATCG', 'TAGC') |
|
2392 | 0.0 |
|
2393 | """ |
|
2394 | d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32) |
|
2395 | ||
2396 | for i in range(len(src)+1): |
|
2397 | d_mat[i, 0] = -(i * gap_cost) |
|
2398 | for j in range(len(tar)+1): |
|
2399 | d_mat[0, j] = -(j * gap_cost) |
|
2400 | for i in range(1, len(src)+1): |
|
2401 | for j in range(1, len(tar)+1): |
|
2402 | match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
|
2403 | delete = d_mat[i-1, j] - gap_cost |
|
2404 | insert = d_mat[i, j-1] - gap_cost |
|
2405 | d_mat[i, j] = max(match, delete, insert) |
|
2406 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2407 | ||
2408 | ||
2409 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2410 | """Return the Smith-Waterman score of two strings. |