|
@@ 2259-2301 (lines=43) @@
|
| 2256 |
|
return mismatch_cost |
| 2257 |
|
|
| 2258 |
|
|
| 2259 |
|
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident): |
| 2260 |
|
"""Return the Needleman-Wunsch score of two strings. |
| 2261 |
|
|
| 2262 |
|
Needleman-Wunsch score |
| 2263 |
|
|
| 2264 |
|
This is the standard edit distance measure. |
| 2265 |
|
|
| 2266 |
|
Cf. https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm |
| 2267 |
|
|
| 2268 |
|
Cf. |
| 2269 |
|
http://csb.stanford.edu/class/public/readings/Bioinformatics_I_Lecture6/Needleman_Wunsch_JMB_70_Global_alignment.pdf |
| 2270 |
|
|
| 2271 |
|
:param str src, tar: two strings to be compared |
| 2272 |
|
:param float gap_cost: the cost of an alignment gap (1 by default) |
| 2273 |
|
:param function sim_func: a function that returns the similarity of two |
| 2274 |
|
characters (identity similarity by default) |
| 2275 |
|
:returns: Needleman-Wunsch score |
| 2276 |
|
:rtype: int (in fact dependent on the gap_cost & return value of sim_func) |
| 2277 |
|
|
| 2278 |
|
>>> needleman_wunsch('cat', 'hat') |
| 2279 |
|
2.0 |
| 2280 |
|
>>> needleman_wunsch('Niall', 'Neil') |
| 2281 |
|
1.0 |
| 2282 |
|
>>> needleman_wunsch('aluminum', 'Catalan') |
| 2283 |
|
-1.0 |
| 2284 |
|
>>> needleman_wunsch('ATCG', 'TAGC') |
| 2285 |
|
0.0 |
| 2286 |
|
""" |
| 2287 |
|
# pylint: disable=no-member |
| 2288 |
|
d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float) |
| 2289 |
|
# pylint: enable=no-member |
| 2290 |
|
|
| 2291 |
|
for i in range(len(src)+1): |
| 2292 |
|
d_mat[i, 0] = -(i * gap_cost) |
| 2293 |
|
for j in range(len(tar)+1): |
| 2294 |
|
d_mat[0, j] = -(j * gap_cost) |
| 2295 |
|
for i in range(1, len(src)+1): |
| 2296 |
|
for j in range(1, len(tar)+1): |
| 2297 |
|
match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
| 2298 |
|
delete = d_mat[i-1, j] - gap_cost |
| 2299 |
|
insert = d_mat[i, j-1] - gap_cost |
| 2300 |
|
d_mat[i, j] = max(match, delete, insert) |
| 2301 |
|
return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
| 2302 |
|
|
| 2303 |
|
|
| 2304 |
|
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
@@ 2304-2343 (lines=40) @@
|
| 2301 |
|
return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
| 2302 |
|
|
| 2303 |
|
|
| 2304 |
|
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
| 2305 |
|
"""Return the Smith-Waterman score of two strings. |
| 2306 |
|
|
| 2307 |
|
Smith-Waterman score |
| 2308 |
|
|
| 2309 |
|
This is the standard edit distance measure. |
| 2310 |
|
|
| 2311 |
|
Cf. https://en.wikipedia.org/wiki/Smith–Waterman_algorithm |
| 2312 |
|
|
| 2313 |
|
:param str src, tar: two strings to be compared |
| 2314 |
|
:param float gap_cost: the cost of an alignment gap (1 by default) |
| 2315 |
|
:param function sim_func: a function that returns the similarity of two |
| 2316 |
|
characters (identity similarity by default) |
| 2317 |
|
:returns: Smith-Waterman score |
| 2318 |
|
:rtype: int (in fact dependent on the gap_cost & return value of sim_func) |
| 2319 |
|
|
| 2320 |
|
>>> smith_waterman('cat', 'hat') |
| 2321 |
|
2.0 |
| 2322 |
|
>>> smith_waterman('Niall', 'Neil') |
| 2323 |
|
1.0 |
| 2324 |
|
>>> smith_waterman('aluminum', 'Catalan') |
| 2325 |
|
0.0 |
| 2326 |
|
>>> smith_waterman('ATCG', 'TAGC') |
| 2327 |
|
1.0 |
| 2328 |
|
""" |
| 2329 |
|
# pylint: disable=no-member |
| 2330 |
|
d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float) |
| 2331 |
|
# pylint: enable=no-member |
| 2332 |
|
|
| 2333 |
|
for i in range(len(src)+1): |
| 2334 |
|
d_mat[i, 0] = 0 |
| 2335 |
|
for j in range(len(tar)+1): |
| 2336 |
|
d_mat[0, j] = 0 |
| 2337 |
|
for i in range(1, len(src)+1): |
| 2338 |
|
for j in range(1, len(tar)+1): |
| 2339 |
|
match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1]) |
| 2340 |
|
delete = d_mat[i-1, j] - gap_cost |
| 2341 |
|
insert = d_mat[i, j-1] - gap_cost |
| 2342 |
|
d_mat[i, j] = max(0, match, delete, insert) |
| 2343 |
|
return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1] |
| 2344 |
|
|
| 2345 |
|
|
| 2346 |
|
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident): |