|
@@ 184-217 (lines=34) @@
|
| 181 |
|
alignment and disallows negative scores. |
| 182 |
|
""" |
| 183 |
|
|
| 184 |
|
def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident): |
| 185 |
|
"""Return the Smith-Waterman score of two strings. |
| 186 |
|
|
| 187 |
|
:param str src: source string for comparison |
| 188 |
|
:param str tar: target string for comparison |
| 189 |
|
:param float gap_cost: the cost of an alignment gap (1 by default) |
| 190 |
|
:param function sim_func: a function that returns the similarity of two |
| 191 |
|
characters (identity similarity by default) |
| 192 |
|
:returns: Smith-Waterman score |
| 193 |
|
:rtype: float |
| 194 |
|
|
| 195 |
|
>>> cmp = SmithWaterman() |
| 196 |
|
>>> cmp.dist_abs('cat', 'hat') |
| 197 |
|
2.0 |
| 198 |
|
>>> cmp.dist_abs('Niall', 'Neil') |
| 199 |
|
1.0 |
| 200 |
|
>>> cmp.dist_abs('aluminum', 'Catalan') |
| 201 |
|
0.0 |
| 202 |
|
>>> cmp.dist_abs('ATCG', 'TAGC') |
| 203 |
|
1.0 |
| 204 |
|
""" |
| 205 |
|
d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32) |
| 206 |
|
|
| 207 |
|
for i in range(len(src) + 1): |
| 208 |
|
d_mat[i, 0] = 0 |
| 209 |
|
for j in range(len(tar) + 1): |
| 210 |
|
d_mat[0, j] = 0 |
| 211 |
|
for i in range(1, len(src) + 1): |
| 212 |
|
for j in range(1, len(tar) + 1): |
| 213 |
|
match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1]) |
| 214 |
|
delete = d_mat[i - 1, j] - gap_cost |
| 215 |
|
insert = d_mat[i, j - 1] - gap_cost |
| 216 |
|
d_mat[i, j] = max(0, match, delete, insert) |
| 217 |
|
return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1] |
| 218 |
|
|
| 219 |
|
|
| 220 |
|
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
@@ 115-148 (lines=34) @@
|
| 112 |
|
return mat[(tar, src)] |
| 113 |
|
return mismatch_cost |
| 114 |
|
|
| 115 |
|
def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident): |
| 116 |
|
"""Return the Needleman-Wunsch score of two strings. |
| 117 |
|
|
| 118 |
|
:param str src: source string for comparison |
| 119 |
|
:param str tar: target string for comparison |
| 120 |
|
:param float gap_cost: the cost of an alignment gap (1 by default) |
| 121 |
|
:param function sim_func: a function that returns the similarity of two |
| 122 |
|
characters (identity similarity by default) |
| 123 |
|
:returns: Needleman-Wunsch score |
| 124 |
|
:rtype: float |
| 125 |
|
|
| 126 |
|
>>> cmp = NeedlemanWunsch() |
| 127 |
|
>>> cmp.dist_abs('cat', 'hat') |
| 128 |
|
2.0 |
| 129 |
|
>>> cmp.dist_abs('Niall', 'Neil') |
| 130 |
|
1.0 |
| 131 |
|
>>> cmp.dist_abs('aluminum', 'Catalan') |
| 132 |
|
-1.0 |
| 133 |
|
>>> cmp.dist_abs('ATCG', 'TAGC') |
| 134 |
|
0.0 |
| 135 |
|
""" |
| 136 |
|
d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32) |
| 137 |
|
|
| 138 |
|
for i in range(len(src) + 1): |
| 139 |
|
d_mat[i, 0] = -(i * gap_cost) |
| 140 |
|
for j in range(len(tar) + 1): |
| 141 |
|
d_mat[0, j] = -(j * gap_cost) |
| 142 |
|
for i in range(1, len(src) + 1): |
| 143 |
|
for j in range(1, len(tar) + 1): |
| 144 |
|
match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1]) |
| 145 |
|
delete = d_mat[i - 1, j] - gap_cost |
| 146 |
|
insert = d_mat[i, j - 1] - gap_cost |
| 147 |
|
d_mat[i, j] = max(match, delete, insert) |
| 148 |
|
return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1] |
| 149 |
|
|
| 150 |
|
|
| 151 |
|
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident): |