Code Duplication    Length = 37-38 lines in 2 locations

abydos/distance.py 2 locations

@@ 2313-2350 (lines=38) @@
2310
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2311
2312
2313
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2314
    """Return the Smith-Waterman score of two strings.
2315
2316
    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
2317
    measure, differing from Needleman-Wunsch in that it focuses on local
2318
    alignment and disallows negative scores.
2319
2320
    :param str src, tar: two strings to be compared
2321
    :param float gap_cost: the cost of an alignment gap (1 by default)
2322
    :param function sim_func: a function that returns the similarity of two
2323
        characters (identity similarity by default)
2324
    :returns: Smith-Waterman score
2325
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2326
2327
    >>> smith_waterman('cat', 'hat')
2328
    2.0
2329
    >>> smith_waterman('Niall', 'Neil')
2330
    1.0
2331
    >>> smith_waterman('aluminum', 'Catalan')
2332
    0.0
2333
    >>> smith_waterman('ATCG', 'TAGC')
2334
    1.0
2335
    """
2336
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2337
2338
    for i in range(len(src)+1):
2339
        d_mat[i, 0] = 0
2340
    for j in range(len(tar)+1):
2341
        d_mat[0, j] = 0
2342
    for i in range(1, len(src)+1):
2343
        for j in range(1, len(tar)+1):
2344
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2345
            delete = d_mat[i-1, j] - gap_cost
2346
            insert = d_mat[i, j-1] - gap_cost
2347
            d_mat[i, j] = max(0, match, delete, insert)
2348
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2349
2350
2351
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
2352
    """Return the Gotoh score of two strings.
2353
@@ 2276-2312 (lines=37) @@
2273
    return mismatch_cost
2274
2275
2276
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2277
    """Return the Needleman-Wunsch score of two strings.
2278
2279
    The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
2280
    distance measure.
2281
2282
    :param str src, tar: two strings to be compared
2283
    :param float gap_cost: the cost of an alignment gap (1 by default)
2284
    :param function sim_func: a function that returns the similarity of two
2285
        characters (identity similarity by default)
2286
    :returns: Needleman-Wunsch score
2287
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2288
2289
    >>> needleman_wunsch('cat', 'hat')
2290
    2.0
2291
    >>> needleman_wunsch('Niall', 'Neil')
2292
    1.0
2293
    >>> needleman_wunsch('aluminum', 'Catalan')
2294
    -1.0
2295
    >>> needleman_wunsch('ATCG', 'TAGC')
2296
    0.0
2297
    """
2298
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2299
2300
    for i in range(len(src)+1):
2301
        d_mat[i, 0] = -(i * gap_cost)
2302
    for j in range(len(tar)+1):
2303
        d_mat[0, j] = -(j * gap_cost)
2304
    for i in range(1, len(src)+1):
2305
        for j in range(1, len(tar)+1):
2306
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2307
            delete = d_mat[i-1, j] - gap_cost
2308
            insert = d_mat[i, j-1] - gap_cost
2309
            d_mat[i, j] = max(match, delete, insert)
2310
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2311
2312
2313
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2314
    """Return the Smith-Waterman score of two strings.
2315