|
@@ 201-234 (lines=34) @@
|
| 198 |
|
alignment and disallows negative scores. |
| 199 |
|
""" |
| 200 |
|
|
| 201 |
|
def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident): |
| 202 |
|
"""Return the Smith-Waterman score of two strings. |
| 203 |
|
|
| 204 |
|
Args: |
| 205 |
|
src (str): Source string for comparison |
| 206 |
|
tar (str): Target string for comparison |
| 207 |
|
gap_cost (float): the cost of an alignment gap (1 by default) |
| 208 |
|
sim_func (function): a function that returns the similarity of two |
| 209 |
|
characters (identity similarity by default) |
| 210 |
|
|
| 211 |
|
Returns: |
| 212 |
|
float: Smith-Waterman score |
| 213 |
|
|
| 214 |
|
Examples: |
| 215 |
|
>>> cmp = SmithWaterman() |
| 216 |
|
>>> cmp.dist_abs('cat', 'hat') |
| 217 |
|
2.0 |
| 218 |
|
>>> cmp.dist_abs('Niall', 'Neil') |
| 219 |
|
1.0 |
| 220 |
|
>>> cmp.dist_abs('aluminum', 'Catalan') |
| 221 |
|
0.0 |
| 222 |
|
>>> cmp.dist_abs('ATCG', 'TAGC') |
| 223 |
|
1.0 |
| 224 |
|
|
| 225 |
|
""" |
| 226 |
|
d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32) |
| 227 |
|
|
| 228 |
|
for i in range(len(src) + 1): |
| 229 |
|
d_mat[i, 0] = 0 |
| 230 |
|
for j in range(len(tar) + 1): |
| 231 |
|
d_mat[0, j] = 0 |
| 232 |
|
for i in range(1, len(src) + 1): |
| 233 |
|
for j in range(1, len(tar) + 1): |
| 234 |
|
match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1]) |
| 235 |
|
delete = d_mat[i - 1, j] - gap_cost |
| 236 |
|
insert = d_mat[i, j - 1] - gap_cost |
| 237 |
|
d_mat[i, j] = max(0, match, delete, insert) |
|
@@ 124-157 (lines=34) @@
|
| 121 |
|
return mat[(tar, src)] |
| 122 |
|
return mismatch_cost |
| 123 |
|
|
| 124 |
|
def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident): |
| 125 |
|
"""Return the Needleman-Wunsch score of two strings. |
| 126 |
|
|
| 127 |
|
Args: |
| 128 |
|
src (str): Source string for comparison |
| 129 |
|
tar (str): Target string for comparison |
| 130 |
|
gap_cost (float): the cost of an alignment gap (1 by default) |
| 131 |
|
sim_func (function): a function that returns the similarity of two |
| 132 |
|
characters (identity similarity by default) |
| 133 |
|
|
| 134 |
|
Returns: |
| 135 |
|
float: Needleman-Wunsch score |
| 136 |
|
|
| 137 |
|
Examples: |
| 138 |
|
>>> cmp = NeedlemanWunsch() |
| 139 |
|
>>> cmp.dist_abs('cat', 'hat') |
| 140 |
|
2.0 |
| 141 |
|
>>> cmp.dist_abs('Niall', 'Neil') |
| 142 |
|
1.0 |
| 143 |
|
>>> cmp.dist_abs('aluminum', 'Catalan') |
| 144 |
|
-1.0 |
| 145 |
|
>>> cmp.dist_abs('ATCG', 'TAGC') |
| 146 |
|
0.0 |
| 147 |
|
|
| 148 |
|
""" |
| 149 |
|
d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32) |
| 150 |
|
|
| 151 |
|
for i in range(len(src) + 1): |
| 152 |
|
d_mat[i, 0] = -(i * gap_cost) |
| 153 |
|
for j in range(len(tar) + 1): |
| 154 |
|
d_mat[0, j] = -(j * gap_cost) |
| 155 |
|
for i in range(1, len(src) + 1): |
| 156 |
|
for j in range(1, len(tar) + 1): |
| 157 |
|
match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1]) |
| 158 |
|
delete = d_mat[i - 1, j] - gap_cost |
| 159 |
|
insert = d_mat[i, j - 1] - gap_cost |
| 160 |
|
d_mat[i, j] = max(match, delete, insert) |