Code Duplication    Length = 57-61 lines in 2 locations

abydos/distance/_ncd_lzss.py 1 location

@@ 34-94 (lines=61) @@
31
__all__ = ['NCDlzss']
32
33
34
class NCDlzss(_Distance):
35
    """Normalized Compression Distance using LZSS compression.
36
37
    Cf. https://en.wikipedia.org/wiki/Lempel-Ziv-Storer-Szymanski
38
39
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
40
41
    .. versionadded:: 0.4.0
42
    """
43
44
    def dist(self, src: str, tar: str) -> float:
45
        """Return the NCD between two strings using LZSS compression.
46
47
        Parameters
48
        ----------
49
        src : str
50
            Source string for comparison
51
        tar : str
52
            Target string for comparison
53
54
        Returns
55
        -------
56
        float
57
            Compression distance
58
59
        Raises
60
        ------
61
        ValueError
62
            Install the PyLZSS module in order to use LZSS
63
64
        Examples
65
        --------
66
        >>> cmp = NCDlzss()
67
        >>> cmp.dist('cat', 'hat')
68
        0.75
69
        >>> cmp.dist('Niall', 'Neil')
70
        1.0
71
        >>> cmp.dist('aluminum', 'Catalan')
72
        1.0
73
        >>> cmp.dist('ATCG', 'TAGC')
74
        0.8
75
76
77
        .. versionadded:: 0.4.0
78
79
        """
80
        if src == tar:
81
            return 0.0
82
83
        if lzss is not None:
84
            src_comp = lzss.encode(src)
85
            tar_comp = lzss.encode(tar)
86
            concat_comp = lzss.encode(src + tar)
87
            concat_comp2 = lzss.encode(tar + src)
88
        else:  # pragma: no cover
89
            raise ValueError('Install the PyLZSS module in order to use LZSS')
90
91
        return (
92
            min(len(concat_comp), len(concat_comp2))
93
            - min(len(src_comp), len(tar_comp))
94
        ) / max(len(src_comp), len(tar_comp))
95
96
97
if __name__ == '__main__':

abydos/distance/_ncd_rle.py 1 location

@@ 28-84 (lines=57) @@
25
__all__ = ['NCDrle']
26
27
28
class NCDrle(_Distance):
29
    """Normalized Compression Distance using RLE.
30
31
    Cf. https://en.wikipedia.org/wiki/Run-length_encoding
32
33
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
34
35
    .. versionadded:: 0.3.6
36
    """
37
38
    _rle = RLE()
39
40
    def dist(self, src: str, tar: str) -> float:
41
        """Return the NCD between two strings using RLE.
42
43
        Parameters
44
        ----------
45
        src : str
46
            Source string for comparison
47
        tar : str
48
            Target string for comparison
49
50
        Returns
51
        -------
52
        float
53
            Compression distance
54
55
        Examples
56
        --------
57
        >>> cmp = NCDrle()
58
        >>> cmp.dist('cat', 'hat')
59
        1.0
60
        >>> cmp.dist('Niall', 'Neil')
61
        1.0
62
        >>> cmp.dist('aluminum', 'Catalan')
63
        1.0
64
        >>> cmp.dist('ATCG', 'TAGC')
65
        1.0
66
67
68
        .. versionadded:: 0.3.5
69
        .. versionchanged:: 0.3.6
70
            Encapsulated in class
71
72
        """
73
        if src == tar:
74
            return 0.0
75
76
        src_comp = self._rle.encode(src)
77
        tar_comp = self._rle.encode(tar)
78
        concat_comp = self._rle.encode(src + tar)
79
        concat_comp2 = self._rle.encode(tar + src)
80
81
        return (
82
            min(len(concat_comp), len(concat_comp2))
83
            - min(len(src_comp), len(tar_comp))
84
        ) / max(len(src_comp), len(tar_comp))
85
86
87
if __name__ == '__main__':