Code Duplication    Length = 40-43 lines in 2 locations

abydos/distance.py 2 locations

@@ 2255-2297 (lines=43) @@
2252
    return mismatch_cost
2253
2254
2255
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2256
    """Return the Needleman-Wunsch score of two strings.
2257
2258
    Needleman-Wunsch score
2259
2260
    This is the standard edit distance measure.
2261
2262
    Cf. https://en.wikipedia.org/wiki/Needleman–Wunsch_algorithm
2263
2264
    Cf.
2265
    http://csb.stanford.edu/class/public/readings/Bioinformatics_I_Lecture6/Needleman_Wunsch_JMB_70_Global_alignment.pdf
2266
2267
    :param str src, tar: two strings to be compared
2268
    :param float gap_cost: the cost of an alignment gap (1 by default)
2269
    :param function sim_func: a function that returns the similarity of two
2270
        characters (identity similarity by default)
2271
    :returns: Needleman-Wunsch score
2272
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2273
2274
    >>> needleman_wunsch('cat', 'hat')
2275
    2.0
2276
    >>> needleman_wunsch('Niall', 'Neil')
2277
    1.0
2278
    >>> needleman_wunsch('aluminum', 'Catalan')
2279
    -1.0
2280
    >>> needleman_wunsch('ATCG', 'TAGC')
2281
    0.0
2282
    """
2283
    # pylint: disable=no-member
2284
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2285
    # pylint: enable=no-member
2286
2287
    for i in range(len(src)+1):
2288
        d_mat[i, 0] = -(i * gap_cost)
2289
    for j in range(len(tar)+1):
2290
        d_mat[0, j] = -(j * gap_cost)
2291
    for i in range(1, len(src)+1):
2292
        for j in range(1, len(tar)+1):
2293
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2294
            delete = d_mat[i-1, j] - gap_cost
2295
            insert = d_mat[i, j-1] - gap_cost
2296
            d_mat[i, j] = max(match, delete, insert)
2297
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2298
2299
2300
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
@@ 2300-2339 (lines=40) @@
2297
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2298
2299
2300
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2301
    """Return the Smith-Waterman score of two strings.
2302
2303
    Smith-Waterman score
2304
2305
    This is the standard edit distance measure.
2306
2307
    Cf. https://en.wikipedia.org/wiki/Smith–Waterman_algorithm
2308
2309
    :param str src, tar: two strings to be compared
2310
    :param float gap_cost: the cost of an alignment gap (1 by default)
2311
    :param function sim_func: a function that returns the similarity of two
2312
        characters (identity similarity by default)
2313
    :returns: Smith-Waterman score
2314
    :rtype: int (in fact dependent on the gap_cost & return value of sim_func)
2315
2316
    >>> smith_waterman('cat', 'hat')
2317
    2.0
2318
    >>> smith_waterman('Niall', 'Neil')
2319
    1.0
2320
    >>> smith_waterman('aluminum', 'Catalan')
2321
    0.0
2322
    >>> smith_waterman('ATCG', 'TAGC')
2323
    1.0
2324
    """
2325
    # pylint: disable=no-member
2326
    d_mat = np.zeros((len(src)+1, len(tar)+1), dtype=np.float)
2327
    # pylint: enable=no-member
2328
2329
    for i in range(len(src)+1):
2330
        d_mat[i, 0] = 0
2331
    for j in range(len(tar)+1):
2332
        d_mat[0, j] = 0
2333
    for i in range(1, len(src)+1):
2334
        for j in range(1, len(tar)+1):
2335
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2336
            delete = d_mat[i-1, j] - gap_cost
2337
            insert = d_mat[i, j-1] - gap_cost
2338
            d_mat[i, j] = max(0, match, delete, insert)
2339
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2340
2341
2342
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):