Code Duplication    Length = 37-38 lines in 2 locations

abydos/distance.py 2 locations

@@ 2284-2321 (lines=38) @@
2281
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2282
2283
2284
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2285
    """Return the Smith-Waterman score of two strings.
2286
2287
    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
2288
    measure, differing from Needleman-Wunsch in that it focuses on local
2289
    alignment and disallows negative scores.
2290
2291
    :param str src, tar: two strings to be compared
2292
    :param float gap_cost: the cost of an alignment gap (1 by default)
2293
    :param function sim_func: a function that returns the similarity of two
2294
        characters (identity similarity by default)
2295
    :returns: Smith-Waterman score
2296
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2297
2298
    >>> smith_waterman('cat', 'hat')
2299
    2.0
2300
    >>> smith_waterman('Niall', 'Neil')
2301
    1.0
2302
    >>> smith_waterman('aluminum', 'Catalan')
2303
    0.0
2304
    >>> smith_waterman('ATCG', 'TAGC')
2305
    1.0
2306
    """
2307
    # pylint: disable=no-member
2308
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2309
    # pylint: enable=no-member
2310
2311
    for i in range(len(src)+1):
2312
        d_mat[i, 0] = 0
2313
    for j in range(len(tar)+1):
2314
        d_mat[0, j] = 0
2315
    for i in range(1, len(src)+1):
2316
        for j in range(1, len(tar)+1):
2317
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2318
            delete = d_mat[i-1, j] - gap_cost
2319
            insert = d_mat[i, j-1] - gap_cost
2320
            d_mat[i, j] = max(0, match, delete, insert)
2321
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2322
2323
2324
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
@@ 2245-2281 (lines=37) @@
2242
    return mismatch_cost
2243
2244
2245
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2246
    """Return the Needleman-Wunsch score of two strings.
2247
2248
    The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
2249
    distance measure.
2250
2251
    :param str src, tar: two strings to be compared
2252
    :param float gap_cost: the cost of an alignment gap (1 by default)
2253
    :param function sim_func: a function that returns the similarity of two
2254
        characters (identity similarity by default)
2255
    :returns: Needleman-Wunsch score
2256
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2257
2258
    >>> needleman_wunsch('cat', 'hat')
2259
    2.0
2260
    >>> needleman_wunsch('Niall', 'Neil')
2261
    1.0
2262
    >>> needleman_wunsch('aluminum', 'Catalan')
2263
    -1.0
2264
    >>> needleman_wunsch('ATCG', 'TAGC')
2265
    0.0
2266
    """
2267
    # pylint: disable=no-member
2268
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2269
    # pylint: enable=no-member
2270
2271
    for i in range(len(src)+1):
2272
        d_mat[i, 0] = -(i * gap_cost)
2273
    for j in range(len(tar)+1):
2274
        d_mat[0, j] = -(j * gap_cost)
2275
    for i in range(1, len(src)+1):
2276
        for j in range(1, len(tar)+1):
2277
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2278
            delete = d_mat[i-1, j] - gap_cost
2279
            insert = d_mat[i, j-1] - gap_cost
2280
            d_mat[i, j] = max(match, delete, insert)
2281
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2282
2283
2284
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):