Code Duplication    Length = 76-76 lines in 2 locations

abydos/distance/_discounted_levenshtein.py 1 location

@@ 219-294 (lines=76) @@
216
217
        return d_mat
218
219
    def alignment(self, src, tar):
220
        """Return the Levenshtein alignment of two strings.
221
222
        Parameters
223
        ----------
224
        src : str
225
            Source string for comparison
226
        tar : str
227
            Target string for comparison
228
229
        Returns
230
        -------
231
        tuple
232
            A tuple containing the Levenshtein distance and the two strings,
233
            aligned.
234
235
        Examples
236
        --------
237
        >>> cmp = DiscountedLevenshtein()
238
        >>> cmp.alignment('cat', 'hat')
239
        (1.0, 'cat', 'hat')
240
        >>> cmp.alignment('Niall', 'Neil')
241
        (2.526064024369237, 'N-iall', 'Neil--')
242
        >>> cmp.alignment('aluminum', 'Catalan')
243
        (5.053867269967515, '-aluminum', 'Catalan--')
244
        >>> cmp.alignment('ATCG', 'TAGC')
245
        (2.594032108779918, 'ATCG-', '-TAGC')
246
247
        >>> cmp = DiscountedLevenshtein(mode='osa')
248
        >>> cmp.alignment('ATCG', 'TAGC')
249
        (1.7482385137517997, 'ATCG', 'TAGC')
250
        >>> cmp.alignment('ACTG', 'TAGC')
251
        (3.342270622531718, '-ACTG', 'TAGC-')
252
253
254
        .. versionadded:: 0.4.1
255
256
        """
257
        d_mat = self._alignment_matrix(src, tar)
258
259
        src_aligned = []
260
        tar_aligned = []
261
262
        src_pos = len(src)
263
        tar_pos = len(tar)
264
265
        distance = d_mat[src_pos, tar_pos]
266
267
        while src_pos and tar_pos:
268
            up = d_mat[src_pos, tar_pos - 1]
269
            left = d_mat[src_pos - 1, tar_pos]
270
            diag = d_mat[src_pos - 1, tar_pos - 1]
271
272
            if diag <= min(up, left):
273
                src_pos -= 1
274
                tar_pos -= 1
275
                src_aligned.append(src[src_pos])
276
                tar_aligned.append(tar[tar_pos])
277
            elif up <= left:
278
                tar_pos -= 1
279
                src_aligned.append('-')
280
                tar_aligned.append(tar[tar_pos])
281
            else:
282
                src_pos -= 1
283
                src_aligned.append(src[src_pos])
284
                tar_aligned.append('-')
285
        while tar_pos:
286
            tar_pos -= 1
287
            tar_aligned.append(tar[tar_pos])
288
            src_aligned.append('-')
289
        while src_pos:
290
            src_pos -= 1
291
            src_aligned.append(src[src_pos])
292
            tar_aligned.append('-')
293
294
        return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1])
295
296
    def dist_abs(self, src, tar):
297
        """Return the Levenshtein distance between two strings.

abydos/distance/_levenshtein.py 1 location

@@ 188-263 (lines=76) @@
185
186
        return d_mat
187
188
    def alignment(self, src, tar):
189
        """Return the Levenshtein alignment of two strings.
190
191
        Parameters
192
        ----------
193
        src : str
194
            Source string for comparison
195
        tar : str
196
            Target string for comparison
197
198
        Returns
199
        -------
200
        tuple
201
            A tuple containing the Levenshtein distance and the two strings,
202
            aligned.
203
204
        Examples
205
        --------
206
        >>> cmp = Levenshtein()
207
        >>> cmp.alignment('cat', 'hat')
208
        (1.0, 'cat', 'hat')
209
        >>> cmp.alignment('Niall', 'Neil')
210
        (3.0, 'Niall', 'Neil-')
211
        >>> cmp.alignment('aluminum', 'Catalan')
212
        (7.0, '-aluminum', 'Catalan--')
213
        >>> cmp.alignment('ATCG', 'TAGC')
214
        (3.0, 'ATCG-', '-TAGC')
215
216
        >>> cmp = Levenshtein(mode='osa')
217
        >>> cmp.alignment('ATCG', 'TAGC')
218
        (2.0, 'ATCG', 'TAGC')
219
        >>> cmp.alignment('ACTG', 'TAGC')
220
        (4.0, 'ACTG', 'TAGC')
221
222
223
        .. versionadded:: 0.4.1
224
225
        """
226
        d_mat = self._alignment_matrix(src, tar)
227
228
        src_aligned = []
229
        tar_aligned = []
230
231
        src_pos = len(src)
232
        tar_pos = len(tar)
233
234
        distance = d_mat[src_pos, tar_pos]
235
236
        while src_pos and tar_pos:
237
            up = d_mat[src_pos, tar_pos - 1]
238
            left = d_mat[src_pos - 1, tar_pos]
239
            diag = d_mat[src_pos - 1, tar_pos - 1]
240
241
            if diag <= min(up, left):
242
                src_pos -= 1
243
                tar_pos -= 1
244
                src_aligned.append(src[src_pos])
245
                tar_aligned.append(tar[tar_pos])
246
            elif up <= left:
247
                tar_pos -= 1
248
                src_aligned.append('-')
249
                tar_aligned.append(tar[tar_pos])
250
            else:
251
                src_pos -= 1
252
                src_aligned.append(src[src_pos])
253
                tar_aligned.append('-')
254
        while tar_pos:
255
            tar_pos -= 1
256
            tar_aligned.append(tar[tar_pos])
257
            src_aligned.append('-')
258
        while src_pos:
259
            src_pos -= 1
260
            src_aligned.append(src[src_pos])
261
            tar_aligned.append('-')
262
263
        return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1])
264
265
    def dist_abs(self, src, tar):
266
        """Return the Levenshtein distance between two strings.