Code Duplication    Length = 49-53 lines in 2 locations

abydos/distance/_ms_contingency.py 1 location

@@ 115-167 (lines=53) @@
112
            **kwargs
113
        )
114
115
    def corr(self, src, tar):
116
        """Return the normalized mean squared contingency corr. of two strings.
117
118
        Parameters
119
        ----------
120
        src : str
121
            Source string (or QGrams/Counter objects) for comparison
122
        tar : str
123
            Target string (or QGrams/Counter objects) for comparison
124
125
        Returns
126
        -------
127
        float
128
            Mean squared contingency correlation
129
130
        Examples
131
        --------
132
        >>> cmp = MSContingency()
133
        >>> cmp.corr('cat', 'hat')
134
        0.6298568508557214
135
        >>> cmp.corr('Niall', 'Neil')
136
        0.4798371954796814
137
        >>> cmp.corr('aluminum', 'Catalan')
138
        0.15214891090821628
139
        >>> cmp.corr('ATCG', 'TAGC')
140
        -0.009076921903905553
141
142
143
        .. versionadded:: 0.4.0
144
145
        """
146
        if src == tar:
147
            return 1.0
148
        if not src or not tar:
149
            return -1.0
150
151
        self._tokenize(src, tar)
152
153
        a = self._intersection_card()
154
        b = self._src_only_card()
155
        c = self._tar_only_card()
156
        d = self._total_complement_card()
157
        ab = self._src_card()
158
        ac = self._tar_card()
159
        admbc = a * d - b * c
160
161
        if admbc:
162
            return (
163
                2 ** 0.5
164
                * admbc
165
                / (admbc ** 2 + ab * ac * (b + d) * (c + d)) ** 0.5
166
            )
167
        return 0.0
168
169
    def sim(self, src, tar):
170
        """Return the normalized ms contingency similarity of two strings.

abydos/distance/_pearson_chi_squared.py 1 location

@@ 112-160 (lines=49) @@
109
            **kwargs
110
        )
111
112
    def sim_score(self, src, tar):
113
        """Return Pearson's Chi-Squared similarity of two strings.
114
115
        Parameters
116
        ----------
117
        src : str
118
            Source string (or QGrams/Counter objects) for comparison
119
        tar : str
120
            Target string (or QGrams/Counter objects) for comparison
121
122
        Returns
123
        -------
124
        float
125
            Pearson's Chi-Squared similarity
126
127
        Examples
128
        --------
129
        >>> cmp = PearsonChiSquared()
130
        >>> cmp.sim_score('cat', 'hat')
131
        193.99489809335964
132
        >>> cmp.sim_score('Niall', 'Neil')
133
        101.99771068526542
134
        >>> cmp.sim_score('aluminum', 'Catalan')
135
        9.19249664336649
136
        >>> cmp.sim_score('ATCG', 'TAGC')
137
        0.032298410951138765
138
139
140
        .. versionadded:: 0.4.0
141
142
        """
143
        self._tokenize(src, tar)
144
145
        a = self._intersection_card()
146
        b = self._src_only_card()
147
        c = self._tar_only_card()
148
        d = self._total_complement_card()
149
        n = self._population_unique_card()
150
        ab = self._src_card()
151
        ac = self._tar_card()
152
153
        if src == tar:
154
            return float(n)
155
        if not src or not tar:
156
            return 0.0
157
        num = n * (a * d - b * c) ** 2
158
        if num:
159
            return num / (ab * ac * (b + d) * (c + d))
160
        return 0.0
161
162
    def corr(self, src, tar):
163
        """Return Pearson's Chi-Squared correlation of two strings.