Code Duplication    Length = 49-53 lines in 2 locations

abydos/distance/_ms_contingency.py 1 location

@@ 111-163 (lines=53) @@
108
            **kwargs
109
        )
110
111
    def corr(self, src: str, tar: str) -> float:
112
        """Return the normalized mean squared contingency corr. of two strings.
113
114
        Parameters
115
        ----------
116
        src : str
117
            Source string (or QGrams/Counter objects) for comparison
118
        tar : str
119
            Target string (or QGrams/Counter objects) for comparison
120
121
        Returns
122
        -------
123
        float
124
            Mean squared contingency correlation
125
126
        Examples
127
        --------
128
        >>> cmp = MSContingency()
129
        >>> cmp.corr('cat', 'hat')
130
        0.6298568508557214
131
        >>> cmp.corr('Niall', 'Neil')
132
        0.4798371954796814
133
        >>> cmp.corr('aluminum', 'Catalan')
134
        0.15214891090821628
135
        >>> cmp.corr('ATCG', 'TAGC')
136
        -0.009076921903905553
137
138
139
        .. versionadded:: 0.4.0
140
141
        """
142
        if src == tar:
143
            return 1.0
144
        if not src or not tar:
145
            return -1.0
146
147
        self._tokenize(src, tar)
148
149
        a = self._intersection_card()
150
        b = self._src_only_card()
151
        c = self._tar_only_card()
152
        d = self._total_complement_card()
153
        ab = self._src_card()
154
        ac = self._tar_card()
155
        admbc = a * d - b * c
156
157
        if admbc:
158
            return (
159
                2 ** 0.5
160
                * admbc
161
                / (admbc ** 2 + ab * ac * (b + d) * (c + d)) ** 0.5
162
            )
163
        return 0.0
164
165
    def sim(self, src: str, tar: str) -> float:
166
        """Return the normalized ms contingency similarity of two strings.

abydos/distance/_pearson_chi_squared.py 1 location

@@ 107-155 (lines=49) @@
104
            **kwargs
105
        )
106
107
    def sim_score(self, src: str, tar: str) -> float:
108
        """Return Pearson's Chi-Squared similarity of two strings.
109
110
        Parameters
111
        ----------
112
        src : str
113
            Source string (or QGrams/Counter objects) for comparison
114
        tar : str
115
            Target string (or QGrams/Counter objects) for comparison
116
117
        Returns
118
        -------
119
        float
120
            Pearson's Chi-Squared similarity
121
122
        Examples
123
        --------
124
        >>> cmp = PearsonChiSquared()
125
        >>> cmp.sim_score('cat', 'hat')
126
        193.99489809335964
127
        >>> cmp.sim_score('Niall', 'Neil')
128
        101.99771068526542
129
        >>> cmp.sim_score('aluminum', 'Catalan')
130
        9.19249664336649
131
        >>> cmp.sim_score('ATCG', 'TAGC')
132
        0.032298410951138765
133
134
135
        .. versionadded:: 0.4.0
136
137
        """
138
        self._tokenize(src, tar)
139
140
        a = self._intersection_card()
141
        b = self._src_only_card()
142
        c = self._tar_only_card()
143
        d = self._total_complement_card()
144
        n = self._population_unique_card()
145
        ab = self._src_card()
146
        ac = self._tar_card()
147
148
        if src == tar:
149
            return float(n)
150
        if not src or not tar:
151
            return 0.0
152
        num = n * (a * d - b * c) ** 2
153
        if num:
154
            return num / (ab * ac * (b + d) * (c + d))
155
        return 0.0
156
157
    def corr(self, src: str, tar: str) -> float:
158
        """Return Pearson's Chi-Squared correlation of two strings.