Code Duplication    Length = 37-38 lines in 2 locations

abydos/distance.py 2 locations

@@ 2285-2322 (lines=38) @@
2282
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2283
2284
2285
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2286
    """Return the Smith-Waterman score of two strings.
2287
2288
    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
2289
    measure, differing from Needleman-Wunsch in that it focuses on local
2290
    alignment and disallows negative scores.
2291
2292
    :param str src, tar: two strings to be compared
2293
    :param float gap_cost: the cost of an alignment gap (1 by default)
2294
    :param function sim_func: a function that returns the similarity of two
2295
        characters (identity similarity by default)
2296
    :returns: Smith-Waterman score
2297
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2298
2299
    >>> smith_waterman('cat', 'hat')
2300
    2.0
2301
    >>> smith_waterman('Niall', 'Neil')
2302
    1.0
2303
    >>> smith_waterman('aluminum', 'Catalan')
2304
    0.0
2305
    >>> smith_waterman('ATCG', 'TAGC')
2306
    1.0
2307
    """
2308
    # pylint: disable=no-member
2309
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2310
    # pylint: enable=no-member
2311
2312
    for i in range(len(src)+1):
2313
        d_mat[i, 0] = 0
2314
    for j in range(len(tar)+1):
2315
        d_mat[0, j] = 0
2316
    for i in range(1, len(src)+1):
2317
        for j in range(1, len(tar)+1):
2318
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2319
            delete = d_mat[i-1, j] - gap_cost
2320
            insert = d_mat[i, j-1] - gap_cost
2321
            d_mat[i, j] = max(0, match, delete, insert)
2322
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2323
2324
2325
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
@@ 2246-2282 (lines=37) @@
2243
    return mismatch_cost
2244
2245
2246
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2247
    """Return the Needleman-Wunsch score of two strings.
2248
2249
    The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
2250
    distance measure.
2251
2252
    :param str src, tar: two strings to be compared
2253
    :param float gap_cost: the cost of an alignment gap (1 by default)
2254
    :param function sim_func: a function that returns the similarity of two
2255
        characters (identity similarity by default)
2256
    :returns: Needleman-Wunsch score
2257
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2258
2259
    >>> needleman_wunsch('cat', 'hat')
2260
    2.0
2261
    >>> needleman_wunsch('Niall', 'Neil')
2262
    1.0
2263
    >>> needleman_wunsch('aluminum', 'Catalan')
2264
    -1.0
2265
    >>> needleman_wunsch('ATCG', 'TAGC')
2266
    0.0
2267
    """
2268
    # pylint: disable=no-member
2269
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2270
    # pylint: enable=no-member
2271
2272
    for i in range(len(src)+1):
2273
        d_mat[i, 0] = -(i * gap_cost)
2274
    for j in range(len(tar)+1):
2275
        d_mat[0, j] = -(j * gap_cost)
2276
    for i in range(1, len(src)+1):
2277
        for j in range(1, len(tar)+1):
2278
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2279
            delete = d_mat[i-1, j] - gap_cost
2280
            insert = d_mat[i, j-1] - gap_cost
2281
            d_mat[i, j] = max(match, delete, insert)
2282
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2283
2284
2285
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):