Code Duplication    Length = 40-43 lines in 2 locations

abydos/distance.py 2 locations

@@ 2253-2295 (lines=43) @@
2250
    return mismatch_cost
2251
2252
2253
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2254
    """Return the Needleman-Wunsch score of two strings.
2255
2256
    Needleman-Wunsch score
2257
2258
    This is the standard edit distance measure.
2259
2260
    Cf. https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
2261
2262
    Cf.
2263
    http://csb.stanford.edu/class/public/readings/Bioinformatics_I_Lecture6/Needleman_Wunsch_JMB_70_Global_alignment.pdf
2264
2265
    :param str src, tar: two strings to be compared
2266
    :param float gap_cost: the cost of an alignment gap (1 by default)
2267
    :param function sim_func: a function that returns the similarity of two
2268
        characters (identity similarity by default)
2269
    :returns: Needleman-Wunsch score
2270
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2271
2272
    >>> needleman_wunsch('cat', 'hat')
2273
    2.0
2274
    >>> needleman_wunsch('Niall', 'Neil')
2275
    1.0
2276
    >>> needleman_wunsch('aluminum', 'Catalan')
2277
    -1.0
2278
    >>> needleman_wunsch('ATCG', 'TAGC')
2279
    0.0
2280
    """
2281
    # pylint: disable=no-member
2282
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2283
    # pylint: enable=no-member
2284
2285
    for i in range(len(src)+1):
2286
        d_mat[i, 0] = -(i * gap_cost)
2287
    for j in range(len(tar)+1):
2288
        d_mat[0, j] = -(j * gap_cost)
2289
    for i in range(1, len(src)+1):
2290
        for j in range(1, len(tar)+1):
2291
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2292
            delete = d_mat[i-1, j] - gap_cost
2293
            insert = d_mat[i, j-1] - gap_cost
2294
            d_mat[i, j] = max(match, delete, insert)
2295
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2296
2297
2298
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
@@ 2298-2337 (lines=40) @@
2295
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2296
2297
2298
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2299
    """Return the Smith-Waterman score of two strings.
2300
2301
    Smith-Waterman score
2302
2303
    This is the standard edit distance measure.
2304
2305
    Cf. https://en.wikipedia.org/wiki/Smith–Waterman_algorithm
2306
2307
    :param str src, tar: two strings to be compared
2308
    :param float gap_cost: the cost of an alignment gap (1 by default)
2309
    :param function sim_func: a function that returns the similarity of two
2310
        characters (identity similarity by default)
2311
    :returns: Smith-Waterman score
2312
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2313
2314
    >>> smith_waterman('cat', 'hat')
2315
    2.0
2316
    >>> smith_waterman('Niall', 'Neil')
2317
    1.0
2318
    >>> smith_waterman('aluminum', 'Catalan')
2319
    0.0
2320
    >>> smith_waterman('ATCG', 'TAGC')
2321
    1.0
2322
    """
2323
    # pylint: disable=no-member
2324
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2325
    # pylint: enable=no-member
2326
2327
    for i in range(len(src)+1):
2328
        d_mat[i, 0] = 0
2329
    for j in range(len(tar)+1):
2330
        d_mat[0, j] = 0
2331
    for i in range(1, len(src)+1):
2332
        for j in range(1, len(tar)+1):
2333
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2334
            delete = d_mat[i-1, j] - gap_cost
2335
            insert = d_mat[i, j-1] - gap_cost
2336
            d_mat[i, j] = max(0, match, delete, insert)
2337
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2338
2339
2340
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):