Code Duplication    Length = 40-43 lines in 2 locations

abydos/distance.py 2 locations

@@ 2254-2296 (lines=43) @@
2251
    return mismatch_cost
2252
2253
2254
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2255
    """Return the Needleman-Wunsch score of two strings.
2256
2257
    Needleman-Wunsch score
2258
2259
    This is the standard edit distance measure.
2260
2261
    Cf. https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
2262
2263
    Cf.
2264
    http://csb.stanford.edu/class/public/readings/Bioinformatics_I_Lecture6/Needleman_Wunsch_JMB_70_Global_alignment.pdf
2265
2266
    :param str src, tar: two strings to be compared
2267
    :param float gap_cost: the cost of an alignment gap (1 by default)
2268
    :param function sim_func: a function that returns the similarity of two
2269
        characters (identity similarity by default)
2270
    :returns: Needleman-Wunsch score
2271
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2272
2273
    >>> needleman_wunsch('cat', 'hat')
2274
    2.0
2275
    >>> needleman_wunsch('Niall', 'Neil')
2276
    1.0
2277
    >>> needleman_wunsch('aluminum', 'Catalan')
2278
    -1.0
2279
    >>> needleman_wunsch('ATCG', 'TAGC')
2280
    0.0
2281
    """
2282
    # pylint: disable=no-member
2283
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2284
    # pylint: enable=no-member
2285
2286
    for i in range(len(src)+1):
2287
        d_mat[i, 0] = -(i * gap_cost)
2288
    for j in range(len(tar)+1):
2289
        d_mat[0, j] = -(j * gap_cost)
2290
    for i in range(1, len(src)+1):
2291
        for j in range(1, len(tar)+1):
2292
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2293
            delete = d_mat[i-1, j] - gap_cost
2294
            insert = d_mat[i, j-1] - gap_cost
2295
            d_mat[i, j] = max(match, delete, insert)
2296
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2297
2298
2299
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
@@ 2299-2338 (lines=40) @@
2296
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2297
2298
2299
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2300
    """Return the Smith-Waterman score of two strings.
2301
2302
    Smith-Waterman score
2303
2304
    This is the standard edit distance measure.
2305
2306
    Cf. https://en.wikipedia.org/wiki/Smith–Waterman_algorithm
2307
2308
    :param str src, tar: two strings to be compared
2309
    :param float gap_cost: the cost of an alignment gap (1 by default)
2310
    :param function sim_func: a function that returns the similarity of two
2311
        characters (identity similarity by default)
2312
    :returns: Smith-Waterman score
2313
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2314
2315
    >>> smith_waterman('cat', 'hat')
2316
    2.0
2317
    >>> smith_waterman('Niall', 'Neil')
2318
    1.0
2319
    >>> smith_waterman('aluminum', 'Catalan')
2320
    0.0
2321
    >>> smith_waterman('ATCG', 'TAGC')
2322
    1.0
2323
    """
2324
    # pylint: disable=no-member
2325
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2326
    # pylint: enable=no-member
2327
2328
    for i in range(len(src)+1):
2329
        d_mat[i, 0] = 0
2330
    for j in range(len(tar)+1):
2331
        d_mat[0, j] = 0
2332
    for i in range(1, len(src)+1):
2333
        for j in range(1, len(tar)+1):
2334
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2335
            delete = d_mat[i-1, j] - gap_cost
2336
            insert = d_mat[i, j-1] - gap_cost
2337
            d_mat[i, j] = max(0, match, delete, insert)
2338
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2339
2340
2341
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):