Code Duplication    Length = 37-38 lines in 2 locations

abydos/distance.py 2 locations

@@ 2409-2446 (lines=38) @@
2406
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2407
2408
2409
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2410
    """Return the Smith-Waterman score of two strings.
2411
2412
    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
2413
    measure, differing from Needleman-Wunsch in that it focuses on local
2414
    alignment and disallows negative scores.
2415
2416
    :param str src: source string for comparison
2417
    :param str tar: target string for comparison
2418
    :param float gap_cost: the cost of an alignment gap (1 by default)
2419
    :param function sim_func: a function that returns the similarity of two
2420
        characters (identity similarity by default)
2421
    :returns: Smith-Waterman score
2422
    :rtype: float
2423
2424
    >>> smith_waterman('cat', 'hat')
2425
    2.0
2426
    >>> smith_waterman('Niall', 'Neil')
2427
    1.0
2428
    >>> smith_waterman('aluminum', 'Catalan')
2429
    0.0
2430
    >>> smith_waterman('ATCG', 'TAGC')
2431
    1.0
2432
    """
2433
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2434
2435
    for i in range(len(src)+1):
2436
        d_mat[i, 0] = 0
2437
    for j in range(len(tar)+1):
2438
        d_mat[0, j] = 0
2439
    for i in range(1, len(src)+1):
2440
        for j in range(1, len(tar)+1):
2441
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2442
            delete = d_mat[i-1, j] - gap_cost
2443
            insert = d_mat[i, j-1] - gap_cost
2444
            d_mat[i, j] = max(0, match, delete, insert)
2445
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2446
2447
2448
def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
2449
    """Return the Gotoh score of two strings.
@@ 2371-2407 (lines=37) @@
2368
    return mismatch_cost
2369
2370
2371
def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2372
    """Return the Needleman-Wunsch score of two strings.
2373
2374
    The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
2375
    distance measure.
2376
2377
    :param str src: source string for comparison
2378
    :param str tar: target string for comparison
2379
    :param float gap_cost: the cost of an alignment gap (1 by default)
2380
    :param function sim_func: a function that returns the similarity of two
2381
        characters (identity similarity by default)
2382
    :returns: Needleman-Wunsch score
2383
    :rtype: float
2384
2385
    >>> needleman_wunsch('cat', 'hat')
2386
    2.0
2387
    >>> needleman_wunsch('Niall', 'Neil')
2388
    1.0
2389
    >>> needleman_wunsch('aluminum', 'Catalan')
2390
    -1.0
2391
    >>> needleman_wunsch('ATCG', 'TAGC')
2392
    0.0
2393
    """
2394
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2395
2396
    for i in range(len(src)+1):
2397
        d_mat[i, 0] = -(i * gap_cost)
2398
    for j in range(len(tar)+1):
2399
        d_mat[0, j] = -(j * gap_cost)
2400
    for i in range(1, len(src)+1):
2401
        for j in range(1, len(tar)+1):
2402
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2403
            delete = d_mat[i-1, j] - gap_cost
2404
            insert = d_mat[i, j-1] - gap_cost
2405
            d_mat[i, j] = max(match, delete, insert)
2406
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2407
2408
2409
def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2410
    """Return the Smith-Waterman score of two strings.