Code Duplication    Length = 42-43 lines in 2 locations

abydos/distance/_overlap.py 1 location

@@ 36-78 (lines=43) @@
33
__all__ = ['Overlap', 'dist_overlap', 'sim_overlap']
34
35
36
class Overlap(_TokenDistance):
37
    r"""Overlap coefficient.
38
39
    For two sets X and Y, the overlap coefficient
40
    :cite:`Szymkiewicz:1934,Simpson:1949`, also called the
41
    Szymkiewicz-Simpson coefficient, is
42
    :math:`sim_{overlap}(X, Y) = \frac{|X \cap Y|}{min(|X|, |Y|)}`.
43
    """
44
45
    def sim(self, src, tar, qval=2):
46
        r"""Return the overlap coefficient of two strings.
47
48
        Parameters
49
        ----------
50
        src : str
51
            Source string (or QGrams/Counter objects) for comparison
52
        tar : str
53
            Target string (or QGrams/Counter objects) for comparison
54
        qval : int
55
            The length of each q-gram; 0 for non-q-gram version
56
57
        Returns
58
        -------
59
        float
60
            Overlap similarity
61
62
        Examples
63
        --------
64
        >>> cmp = Overlap()
65
        >>> cmp.sim('cat', 'hat')
66
        0.5
67
        >>> cmp.sim('Niall', 'Neil')
68
        0.4
69
        >>> cmp.sim('aluminum', 'Catalan')
70
        0.125
71
        >>> cmp.sim('ATCG', 'TAGC')
72
        0.0
73
74
        """
75
        if src == tar:
76
            return 1.0
77
        elif not src or not tar:
78
            return 0.0
79
80
        q_src, q_tar = self._get_qgrams(src, tar, qval)
81
        q_src_mag = sum(q_src.values())

abydos/distance/_cosine.py 1 location

@@ 38-79 (lines=42) @@
35
__all__ = ['Cosine', 'dist_cosine', 'sim_cosine']
36
37
38
class Cosine(_TokenDistance):
39
    r"""Cosine similarity.
40
41
    For two sets X and Y, the cosine similarity, Otsuka-Ochiai coefficient, or
42
    Ochiai coefficient :cite:`Otsuka:1936,Ochiai:1957` is:
43
    :math:`sim_{cosine}(X, Y) = \frac{|X \cap Y|}{\sqrt{|X| \cdot |Y|}}`.
44
    """
45
46
    def sim(self, src, tar, qval=2):
47
        r"""Return the cosine similarity of two strings.
48
49
        Parameters
50
        ----------
51
        src : str
52
            Source string (or QGrams/Counter objects) for comparison
53
        tar : str
54
            Target string (or QGrams/Counter objects) for comparison
55
        qval : int
56
            The length of each q-gram; 0 for non-q-gram version
57
58
        Returns
59
        -------
60
        float
61
            Cosine similarity
62
63
        Examples
64
        --------
65
        >>> cmp = Cosine()
66
        >>> cmp.sim('cat', 'hat')
67
        0.5
68
        >>> cmp.sim('Niall', 'Neil')
69
        0.3651483716701107
70
        >>> cmp.sim('aluminum', 'Catalan')
71
        0.11785113019775793
72
        >>> cmp.sim('ATCG', 'TAGC')
73
        0.0
74
75
        """
76
        if src == tar:
77
            return 1.0
78
        if not src or not tar:
79
            return 0.0
80
81
        q_src, q_tar = self._get_qgrams(src, tar, qval)
82
        q_src_mag = sum(q_src.values())