Code Duplication - chrislit/abydos - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 37-38 lines in 2 locations

abydos/distance.py 2 locations


    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]


def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
    """Return the Smith-Waterman score of two strings.

    The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
    measure, differing from Needleman-Wunsch in that it focuses on local
    alignment and disallows negative scores.

    :param str src: source string for comparison
    :param str tar: target string for comparison
    :param float gap_cost: the cost of an alignment gap (1 by default)
    :param function sim_func: a function that returns the similarity of two
        characters (identity similarity by default)
    :returns: Smith-Waterman score
    :rtype: float

    >>> smith_waterman('cat', 'hat')
    2.0
    >>> smith_waterman('Niall', 'Neil')
    1.0
    >>> smith_waterman('aluminum', 'Catalan')
    0.0
    >>> smith_waterman('ATCG', 'TAGC')
    1.0
    """
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)

    for i in range(len(src)+1):
        d_mat[i, 0] = 0
    for j in range(len(tar)+1):
        d_mat[0, j] = 0
    for i in range(1, len(src)+1):
        for j in range(1, len(tar)+1):
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
            delete = d_mat[i-1, j] - gap_cost
            insert = d_mat[i, j-1] - gap_cost
            d_mat[i, j] = max(0, match, delete, insert)
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]


def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
    """Return the Gotoh score of two strings.

    return mismatch_cost


def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
    """Return the Needleman-Wunsch score of two strings.

    The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
    distance measure.

    :param str src: source string for comparison
    :param str tar: target string for comparison
    :param float gap_cost: the cost of an alignment gap (1 by default)
    :param function sim_func: a function that returns the similarity of two
        characters (identity similarity by default)
    :returns: Needleman-Wunsch score
    :rtype: float

    >>> needleman_wunsch('cat', 'hat')
    2.0
    >>> needleman_wunsch('Niall', 'Neil')
    1.0
    >>> needleman_wunsch('aluminum', 'Catalan')
    -1.0
    >>> needleman_wunsch('ATCG', 'TAGC')
    0.0
    """
    d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)

    for i in range(len(src)+1):
        d_mat[i, 0] = -(i * gap_cost)
    for j in range(len(tar)+1):
        d_mat[0, j] = -(j * gap_cost)
    for i in range(1, len(src)+1):
        for j in range(1, len(tar)+1):
            match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
            delete = d_mat[i-1, j] - gap_cost
            insert = d_mat[i, j-1] - gap_cost
            d_mat[i, j] = max(match, delete, insert)
    return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]


def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
    """Return the Smith-Waterman score of two strings.

		@@ 2409-2446 (lines=38) @@
2406		return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2407
2408
2409		def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2410		"""Return the Smith-Waterman score of two strings.
2411
2412		The Smith-Waterman score :cite:`Smith:1981` is a standard edit distance
2413		measure, differing from Needleman-Wunsch in that it focuses on local
2414		alignment and disallows negative scores.
2415
2416		:param str src: source string for comparison
2417		:param str tar: target string for comparison
2418		:param float gap_cost: the cost of an alignment gap (1 by default)
2419		:param function sim_func: a function that returns the similarity of two
2420		characters (identity similarity by default)
2421		:returns: Smith-Waterman score
2422		:rtype: float
2423
2424		>>> smith_waterman('cat', 'hat')
2425		2.0
2426		>>> smith_waterman('Niall', 'Neil')
2427		1.0
2428		>>> smith_waterman('aluminum', 'Catalan')
2429		0.0
2430		>>> smith_waterman('ATCG', 'TAGC')
2431		1.0
2432		"""
2433		d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2434
2435		for i in range(len(src)+1):
2436		d_mat[i, 0] = 0
2437		for j in range(len(tar)+1):
2438		d_mat[0, j] = 0
2439		for i in range(1, len(src)+1):
2440		for j in range(1, len(tar)+1):
2441		match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2442		delete = d_mat[i-1, j] - gap_cost
2443		insert = d_mat[i, j-1] - gap_cost
2444		d_mat[i, j] = max(0, match, delete, insert)
2445		return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2446
2447
2448		def gotoh(src, tar, gap_open=1, gap_ext=0.4, sim_func=sim_ident):
2449		"""Return the Gotoh score of two strings.
		@@ 2371-2407 (lines=37) @@
2368		return mismatch_cost
2369
2370
2371		def needleman_wunsch(src, tar, gap_cost=1, sim_func=sim_ident):
2372		"""Return the Needleman-Wunsch score of two strings.
2373
2374		The Needleman-Wunsch score :cite:`Needleman:1970` is a standard edit
2375		distance measure.
2376
2377		:param str src: source string for comparison
2378		:param str tar: target string for comparison
2379		:param float gap_cost: the cost of an alignment gap (1 by default)
2380		:param function sim_func: a function that returns the similarity of two
2381		characters (identity similarity by default)
2382		:returns: Needleman-Wunsch score
2383		:rtype: float
2384
2385		>>> needleman_wunsch('cat', 'hat')
2386		2.0
2387		>>> needleman_wunsch('Niall', 'Neil')
2388		1.0
2389		>>> needleman_wunsch('aluminum', 'Catalan')
2390		-1.0
2391		>>> needleman_wunsch('ATCG', 'TAGC')
2392		0.0
2393		"""
2394		d_mat = np_zeros((len(src)+1, len(tar)+1), dtype=np_float32)
2395
2396		for i in range(len(src)+1):
2397		d_mat[i, 0] = -(i * gap_cost)
2398		for j in range(len(tar)+1):
2399		d_mat[0, j] = -(j * gap_cost)
2400		for i in range(1, len(src)+1):
2401		for j in range(1, len(tar)+1):
2402		match = d_mat[i-1, j-1] + sim_func(src[i-1], tar[j-1])
2403		delete = d_mat[i-1, j] - gap_cost
2404		insert = d_mat[i, j-1] - gap_cost
2405		d_mat[i, j] = max(match, delete, insert)
2406		return d_mat[d_mat.shape[0]-1, d_mat.shape[1]-1]
2407
2408
2409		def smith_waterman(src, tar, gap_cost=1, sim_func=sim_ident):
2410		"""Return the Smith-Waterman score of two strings.

chrislit / abydos

Code Duplication Length = 37-38 lines in 2 locations

abydos/distance.py 2 locations