| @@ 219-294 (lines=76) @@ | ||
| 216 | ||
| 217 | return d_mat | |
| 218 | ||
| 219 | def alignment(self, src, tar): | |
| 220 | """Return the Levenshtein alignment of two strings. | |
| 221 | ||
| 222 | Parameters | |
| 223 | ---------- | |
| 224 | src : str | |
| 225 | Source string for comparison | |
| 226 | tar : str | |
| 227 | Target string for comparison | |
| 228 | ||
| 229 | Returns | |
| 230 | ------- | |
| 231 | tuple | |
| 232 | A tuple containing the Levenshtein distance and the two strings, | |
| 233 | aligned. | |
| 234 | ||
| 235 | Examples | |
| 236 | -------- | |
| 237 | >>> cmp = DiscountedLevenshtein() | |
| 238 |         >>> cmp.alignment('cat', 'hat') | |
| 239 | (1.0, 'cat', 'hat') | |
| 240 |         >>> cmp.alignment('Niall', 'Neil') | |
| 241 | (2.526064024369237, 'N-iall', 'Neil--') | |
| 242 |         >>> cmp.alignment('aluminum', 'Catalan') | |
| 243 | (5.053867269967515, '-aluminum', 'Catalan--') | |
| 244 |         >>> cmp.alignment('ATCG', 'TAGC') | |
| 245 | (2.594032108779918, 'ATCG-', '-TAGC') | |
| 246 | ||
| 247 | >>> cmp = DiscountedLevenshtein(mode='osa') | |
| 248 |         >>> cmp.alignment('ATCG', 'TAGC') | |
| 249 | (1.7482385137517997, 'ATCG', 'TAGC') | |
| 250 |         >>> cmp.alignment('ACTG', 'TAGC') | |
| 251 | (3.342270622531718, '-ACTG', 'TAGC-') | |
| 252 | ||
| 253 | ||
| 254 | .. versionadded:: 0.4.1 | |
| 255 | ||
| 256 | """ | |
| 257 | d_mat = self._alignment_matrix(src, tar) | |
| 258 | ||
| 259 | src_aligned = [] | |
| 260 | tar_aligned = [] | |
| 261 | ||
| 262 | src_pos = len(src) | |
| 263 | tar_pos = len(tar) | |
| 264 | ||
| 265 | distance = d_mat[src_pos, tar_pos] | |
| 266 | ||
| 267 | while src_pos and tar_pos: | |
| 268 | up = d_mat[src_pos, tar_pos - 1] | |
| 269 | left = d_mat[src_pos - 1, tar_pos] | |
| 270 | diag = d_mat[src_pos - 1, tar_pos - 1] | |
| 271 | ||
| 272 | if diag <= min(up, left): | |
| 273 | src_pos -= 1 | |
| 274 | tar_pos -= 1 | |
| 275 | src_aligned.append(src[src_pos]) | |
| 276 | tar_aligned.append(tar[tar_pos]) | |
| 277 | elif up <= left: | |
| 278 | tar_pos -= 1 | |
| 279 |                 src_aligned.append('-') | |
| 280 | tar_aligned.append(tar[tar_pos]) | |
| 281 | else: | |
| 282 | src_pos -= 1 | |
| 283 | src_aligned.append(src[src_pos]) | |
| 284 |                 tar_aligned.append('-') | |
| 285 | while tar_pos: | |
| 286 | tar_pos -= 1 | |
| 287 | tar_aligned.append(tar[tar_pos]) | |
| 288 |             src_aligned.append('-') | |
| 289 | while src_pos: | |
| 290 | src_pos -= 1 | |
| 291 | src_aligned.append(src[src_pos]) | |
| 292 |             tar_aligned.append('-') | |
| 293 | ||
| 294 | return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1]) | |
| 295 | ||
| 296 | def dist_abs(self, src, tar): | |
| 297 | """Return the Levenshtein distance between two strings. | |
| @@ 188-263 (lines=76) @@ | ||
| 185 | ||
| 186 | return d_mat | |
| 187 | ||
| 188 | def alignment(self, src, tar): | |
| 189 | """Return the Levenshtein alignment of two strings. | |
| 190 | ||
| 191 | Parameters | |
| 192 | ---------- | |
| 193 | src : str | |
| 194 | Source string for comparison | |
| 195 | tar : str | |
| 196 | Target string for comparison | |
| 197 | ||
| 198 | Returns | |
| 199 | ------- | |
| 200 | tuple | |
| 201 | A tuple containing the Levenshtein distance and the two strings, | |
| 202 | aligned. | |
| 203 | ||
| 204 | Examples | |
| 205 | -------- | |
| 206 | >>> cmp = Levenshtein() | |
| 207 |         >>> cmp.alignment('cat', 'hat') | |
| 208 | (1.0, 'cat', 'hat') | |
| 209 |         >>> cmp.alignment('Niall', 'Neil') | |
| 210 | (3.0, 'Niall', 'Neil-') | |
| 211 |         >>> cmp.alignment('aluminum', 'Catalan') | |
| 212 | (7.0, '-aluminum', 'Catalan--') | |
| 213 |         >>> cmp.alignment('ATCG', 'TAGC') | |
| 214 | (3.0, 'ATCG-', '-TAGC') | |
| 215 | ||
| 216 | >>> cmp = Levenshtein(mode='osa') | |
| 217 |         >>> cmp.alignment('ATCG', 'TAGC') | |
| 218 | (2.0, 'ATCG', 'TAGC') | |
| 219 |         >>> cmp.alignment('ACTG', 'TAGC') | |
| 220 | (4.0, 'ACTG', 'TAGC') | |
| 221 | ||
| 222 | ||
| 223 | .. versionadded:: 0.4.1 | |
| 224 | ||
| 225 | """ | |
| 226 | d_mat = self._alignment_matrix(src, tar) | |
| 227 | ||
| 228 | src_aligned = [] | |
| 229 | tar_aligned = [] | |
| 230 | ||
| 231 | src_pos = len(src) | |
| 232 | tar_pos = len(tar) | |
| 233 | ||
| 234 | distance = d_mat[src_pos, tar_pos] | |
| 235 | ||
| 236 | while src_pos and tar_pos: | |
| 237 | up = d_mat[src_pos, tar_pos - 1] | |
| 238 | left = d_mat[src_pos - 1, tar_pos] | |
| 239 | diag = d_mat[src_pos - 1, tar_pos - 1] | |
| 240 | ||
| 241 | if diag <= min(up, left): | |
| 242 | src_pos -= 1 | |
| 243 | tar_pos -= 1 | |
| 244 | src_aligned.append(src[src_pos]) | |
| 245 | tar_aligned.append(tar[tar_pos]) | |
| 246 | elif up <= left: | |
| 247 | tar_pos -= 1 | |
| 248 |                 src_aligned.append('-') | |
| 249 | tar_aligned.append(tar[tar_pos]) | |
| 250 | else: | |
| 251 | src_pos -= 1 | |
| 252 | src_aligned.append(src[src_pos]) | |
| 253 |                 tar_aligned.append('-') | |
| 254 | while tar_pos: | |
| 255 | tar_pos -= 1 | |
| 256 | tar_aligned.append(tar[tar_pos]) | |
| 257 |             src_aligned.append('-') | |
| 258 | while src_pos: | |
| 259 | src_pos -= 1 | |
| 260 | src_aligned.append(src[src_pos]) | |
| 261 |             tar_aligned.append('-') | |
| 262 | ||
| 263 | return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1]) | |
| 264 | ||
| 265 | def dist_abs(self, src, tar): | |
| 266 | """Return the Levenshtein distance between two strings. | |