Code Duplication    Length = 40-43 lines in 2 locations

abydos/distance.py 2 locations

@@ 2259-2301 (lines=43) @@
2256
    return mismatch_cost
2257
2258
2259
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2260
    """Return the Needleman-Wunsch score of two strings.
2261
2262
    Needleman-Wunsch score
2263
2264
    This is the standard edit distance measure.
2265
2266
    Cf. https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
2267
2268
    Cf.
2269
    http://csb.stanford.edu/class/public/readings/Bioinformatics_I_Lecture6/Needleman_Wunsch_JMB_70_Global_alignment.pdf
2270
2271
    :param str src, tar: two strings to be compared
2272
    :param float gap_cost: the cost of an alignment gap (1 by default)
2273
    :param function sim_func: a function that returns the similarity of two
2274
        characters (identity similarity by default)
2275
    :returns: Needleman-Wunsch score
2276
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2277
2278
    >>> needleman_wunsch('cat', 'hat')
2279
    2.0
2280
    >>> needleman_wunsch('Niall', 'Neil')
2281
    1.0
2282
    >>> needleman_wunsch('aluminum', 'Catalan')
2283
    -1.0
2284
    >>> needleman_wunsch('ATCG', 'TAGC')
2285
    0.0
2286
    """
2287
    # pylint: disable=no-member
2288
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2289
    # pylint: enable=no-member
2290
2291
    for i in range(len(src)+1):
2292
        d_mat[i, 0] = -(i * gap_cost)
2293
    for j in range(len(tar)+1):
2294
        d_mat[0, j] = -(j * gap_cost)
2295
    for i in range(1, len(src)+1):
2296
        for j in range(1, len(tar)+1):
2297
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2298
            delete = d_mat[i-1, j] - gap_cost
2299
            insert = d_mat[i, j-1] - gap_cost
2300
            d_mat[i, j] = max(match, delete, insert)
2301
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2302
2303
2304
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
@@ 2304-2343 (lines=40) @@
2301
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2302
2303
2304
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2305
    """Return the Smith-Waterman score of two strings.
2306
2307
    Smith-Waterman score
2308
2309
    This is the standard edit distance measure.
2310
2311
    Cf. https://en.wikipedia.org/wiki/Smith–Waterman_algorithm
2312
2313
    :param str src, tar: two strings to be compared
2314
    :param float gap_cost: the cost of an alignment gap (1 by default)
2315
    :param function sim_func: a function that returns the similarity of two
2316
        characters (identity similarity by default)
2317
    :returns: Smith-Waterman score
2318
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2319
2320
    >>> smith_waterman('cat', 'hat')
2321
    2.0
2322
    >>> smith_waterman('Niall', 'Neil')
2323
    1.0
2324
    >>> smith_waterman('aluminum', 'Catalan')
2325
    0.0
2326
    >>> smith_waterman('ATCG', 'TAGC')
2327
    1.0
2328
    """
2329
    # pylint: disable=no-member
2330
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2331
    # pylint: enable=no-member
2332
2333
    for i in range(len(src)+1):
2334
        d_mat[i, 0] = 0
2335
    for j in range(len(tar)+1):
2336
        d_mat[0, j] = 0
2337
    for i in range(1, len(src)+1):
2338
        for j in range(1, len(tar)+1):
2339
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2340
            delete = d_mat[i-1, j] - gap_cost
2341
            insert = d_mat[i, j-1] - gap_cost
2342
            d_mat[i, j] = max(0, match, delete, insert)
2343
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2344
2345
2346
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):