1 | # -*- coding: utf-8 -*- |
||
2 | |||
3 | # Copyright 2014-2018 by Christopher C. Little. |
||
4 | # This file is part of Abydos. |
||
5 | # |
||
6 | # Abydos is free software: you can redistribute it and/or modify |
||
7 | # it under the terms of the GNU General Public License as published by |
||
8 | # the Free Software Foundation, either version 3 of the License, or |
||
9 | # (at your option) any later version. |
||
10 | # |
||
11 | # Abydos is distributed in the hope that it will be useful, |
||
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
14 | # GNU General Public License for more details. |
||
15 | # |
||
16 | # You should have received a copy of the GNU General Public License |
||
17 | # along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
||
18 | |||
19 | 1 | """abydos.distance._smith_waterman. |
|
20 | |||
21 | Smith-Waterman score |
||
22 | """ |
||
23 | |||
24 | 1 | from __future__ import ( |
|
25 | absolute_import, |
||
26 | division, |
||
27 | print_function, |
||
28 | unicode_literals, |
||
29 | ) |
||
30 | |||
31 | 1 | from numpy import float32 as np_float32 |
|
32 | 1 | from numpy import zeros as np_zeros |
|
33 | |||
34 | 1 | from six.moves import range |
|
35 | |||
36 | 1 | from ._ident import sim_ident |
|
37 | 1 | from ._needleman_wunsch import NeedlemanWunsch |
|
38 | |||
39 | 1 | __all__ = ['SmithWaterman', 'smith_waterman'] |
|
40 | |||
41 | |||
42 | 1 | class SmithWaterman(NeedlemanWunsch): |
|
43 | """Smith-Waterman score. |
||
44 | |||
45 | The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance |
||
46 | measure, differing from Needleman-Wunsch in that it focuses on local |
||
47 | alignment and disallows negative scores. |
||
48 | """ |
||
49 | |||
50 | 1 | View Code Duplication | def dist_abs(self, src, tar, gap_cost=1, sim_func=sim_ident): |
0 ignored issues
–
show
Duplication
introduced
by
![]() |
|||
51 | """Return the Smith-Waterman score of two strings. |
||
52 | |||
53 | Parameters |
||
54 | ---------- |
||
55 | src : str |
||
56 | Source string for comparison |
||
57 | tar : str |
||
58 | Target string for comparison |
||
59 | gap_cost : float |
||
60 | The cost of an alignment gap (1 by default) |
||
61 | sim_func : function |
||
62 | A function that returns the similarity of two characters (identity |
||
63 | similarity by default) |
||
64 | |||
65 | Returns |
||
66 | ------- |
||
67 | float |
||
68 | Smith-Waterman score |
||
69 | |||
70 | Examples |
||
71 | -------- |
||
72 | >>> cmp = SmithWaterman() |
||
73 | >>> cmp.dist_abs('cat', 'hat') |
||
74 | 2.0 |
||
75 | >>> cmp.dist_abs('Niall', 'Neil') |
||
76 | 1.0 |
||
77 | >>> cmp.dist_abs('aluminum', 'Catalan') |
||
78 | 0.0 |
||
79 | >>> cmp.dist_abs('ATCG', 'TAGC') |
||
80 | 1.0 |
||
81 | |||
82 | """ |
||
83 | 1 | d_mat = np_zeros((len(src) + 1, len(tar) + 1), dtype=np_float32) |
|
84 | |||
85 | 1 | for i in range(len(src) + 1): |
|
86 | 1 | d_mat[i, 0] = 0 |
|
87 | 1 | for j in range(len(tar) + 1): |
|
88 | 1 | d_mat[0, j] = 0 |
|
89 | 1 | for i in range(1, len(src) + 1): |
|
90 | 1 | for j in range(1, len(tar) + 1): |
|
91 | 1 | match = d_mat[i - 1, j - 1] + sim_func(src[i - 1], tar[j - 1]) |
|
92 | 1 | delete = d_mat[i - 1, j] - gap_cost |
|
93 | 1 | insert = d_mat[i, j - 1] - gap_cost |
|
94 | 1 | d_mat[i, j] = max(0, match, delete, insert) |
|
95 | 1 | return d_mat[d_mat.shape[0] - 1, d_mat.shape[1] - 1] |
|
96 | |||
97 | |||
98 | 1 | def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident): |
|
99 | """Return the Smith-Waterman score of two strings. |
||
100 | |||
101 | This is a wrapper for :py:meth:`SmithWaterman.dist_abs`. |
||
102 | |||
103 | Parameters |
||
104 | ---------- |
||
105 | src : str |
||
106 | Source string for comparison |
||
107 | tar : str |
||
108 | Target string for comparison |
||
109 | gap_cost : float |
||
110 | The cost of an alignment gap (1 by default) |
||
111 | sim_func : function |
||
112 | A function that returns the similarity of two characters (identity |
||
113 | similarity by default) |
||
114 | |||
115 | Returns |
||
116 | ------- |
||
117 | float |
||
118 | Smith-Waterman score |
||
119 | |||
120 | Examples |
||
121 | -------- |
||
122 | >>> smith_waterman('cat', 'hat') |
||
123 | 2.0 |
||
124 | >>> smith_waterman('Niall', 'Neil') |
||
125 | 1.0 |
||
126 | >>> smith_waterman('aluminum', 'Catalan') |
||
127 | 0.0 |
||
128 | >>> smith_waterman('ATCG', 'TAGC') |
||
129 | 1.0 |
||
130 | |||
131 | """ |
||
132 | 1 | return SmithWaterman().dist_abs(src, tar, gap_cost, sim_func) |
|
133 | |||
134 | |||
135 | if __name__ == '__main__': |
||
136 | import doctest |
||
137 | |||
138 | doctest.testmod() |
||
139 |