@@ 2285-2322 (lines=38) @@ | ||
2282 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2283 | ||
2284 | ||
2285 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2286 | """Return the Smith-Waterman score of two strings. |
|
2287 | ||
2288 | The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance |
|
2289 | measure, differing from Needleman-Wunsch in that it focuses on local |
|
2290 | alignment and disallows negative scores. |
|
2291 | ||
2292 | :param str src, tar: two strings to be compared |
|
2293 | :param float gap_cost: the cost of an alignment gap (1 by default) |
|
2294 | :param function sim_func: a function that returns the similarity of two |
|
2295 | characters (identity similarity by default) |
|
2296 | :returns: Smith-Waterman score |
|
2297 | :rtype: int (in fact dependent on the gap_cost & return value of sim_func) |
|
2298 | ||
2299 | >>> smith_waterman('cat', 'hat') |
|
2300 | 2.0 |
|
2301 | >>> smith_waterman('Niall', 'Neil') |
|
2302 | 1.0 |
|
2303 | >>> smith_waterman('aluminum', 'Catalan') |
|
2304 | 0.0 |
|
2305 | >>> smith_waterman('ATCG', 'TAGC') |
|
2306 | 1.0 |
|
2307 | """ |
|
2308 | # pylint: disable=no-member |
|
2309 | d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32) |
|
2310 | # pylint: enable=no-member |
|
2311 | ||
2312 | for i in range(len(src)+1): |
|
2313 | d_mat[i, 0] = 0 |
|
2314 | for j in range(len(tar)+1): |
|
2315 | d_mat[0, j] = 0 |
|
2316 | for i in range(1, len(src)+1): |
|
2317 | for j in range(1, len(tar)+1): |
|
2318 | match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
|
2319 | delete = d_mat[i-1, j] - gap_cost |
|
2320 | insert = d_mat[i, j-1] - gap_cost |
|
2321 | d_mat[i, j] = max(0, match, delete, insert) |
|
2322 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2323 | ||
2324 | ||
2325 | def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident): |
|
@@ 2246-2282 (lines=37) @@ | ||
2243 | return mismatch_cost |
|
2244 | ||
2245 | ||
2246 | def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident): |
|
2247 | """Return the Needleman-Wunsch score of two strings. |
|
2248 | ||
2249 | The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit |
|
2250 | distance measure. |
|
2251 | ||
2252 | :param str src, tar: two strings to be compared |
|
2253 | :param float gap_cost: the cost of an alignment gap (1 by default) |
|
2254 | :param function sim_func: a function that returns the similarity of two |
|
2255 | characters (identity similarity by default) |
|
2256 | :returns: Needleman-Wunsch score |
|
2257 | :rtype: int (in fact dependent on the gap_cost & return value of sim_func) |
|
2258 | ||
2259 | >>> needleman_wunsch('cat', 'hat') |
|
2260 | 2.0 |
|
2261 | >>> needleman_wunsch('Niall', 'Neil') |
|
2262 | 1.0 |
|
2263 | >>> needleman_wunsch('aluminum', 'Catalan') |
|
2264 | -1.0 |
|
2265 | >>> needleman_wunsch('ATCG', 'TAGC') |
|
2266 | 0.0 |
|
2267 | """ |
|
2268 | # pylint: disable=no-member |
|
2269 | d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32) |
|
2270 | # pylint: enable=no-member |
|
2271 | ||
2272 | for i in range(len(src)+1): |
|
2273 | d_mat[i, 0] = -(i * gap_cost) |
|
2274 | for j in range(len(tar)+1): |
|
2275 | d_mat[0, j] = -(j * gap_cost) |
|
2276 | for i in range(1, len(src)+1): |
|
2277 | for j in range(1, len(tar)+1): |
|
2278 | match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
|
2279 | delete = d_mat[i-1, j] - gap_cost |
|
2280 | insert = d_mat[i, j-1] - gap_cost |
|
2281 | d_mat[i, j] = max(match, delete, insert) |
|
2282 | return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
|
2283 | ||
2284 | ||
2285 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |