| @@ 219-294 (lines=76) @@ | ||
| 216 | ||
| 217 | return d_mat |
|
| 218 | ||
| 219 | def alignment(self, src, tar): |
|
| 220 | """Return the Levenshtein alignment of two strings. |
|
| 221 | ||
| 222 | Parameters |
|
| 223 | ---------- |
|
| 224 | src : str |
|
| 225 | Source string for comparison |
|
| 226 | tar : str |
|
| 227 | Target string for comparison |
|
| 228 | ||
| 229 | Returns |
|
| 230 | ------- |
|
| 231 | tuple |
|
| 232 | A tuple containing the Levenshtein distance and the two strings, |
|
| 233 | aligned. |
|
| 234 | ||
| 235 | Examples |
|
| 236 | -------- |
|
| 237 | >>> cmp = DiscountedLevenshtein() |
|
| 238 | >>> cmp.alignment('cat', 'hat') |
|
| 239 | (1.0, 'cat', 'hat') |
|
| 240 | >>> cmp.alignment('Niall', 'Neil') |
|
| 241 | (2.526064024369237, 'N-iall', 'Neil--') |
|
| 242 | >>> cmp.alignment('aluminum', 'Catalan') |
|
| 243 | (5.053867269967515, '-aluminum', 'Catalan--') |
|
| 244 | >>> cmp.alignment('ATCG', 'TAGC') |
|
| 245 | (2.594032108779918, 'ATCG-', '-TAGC') |
|
| 246 | ||
| 247 | >>> cmp = DiscountedLevenshtein(mode='osa') |
|
| 248 | >>> cmp.alignment('ATCG', 'TAGC') |
|
| 249 | (1.7482385137517997, 'ATCG', 'TAGC') |
|
| 250 | >>> cmp.alignment('ACTG', 'TAGC') |
|
| 251 | (3.342270622531718, '-ACTG', 'TAGC-') |
|
| 252 | ||
| 253 | ||
| 254 | .. versionadded:: 0.4.1 |
|
| 255 | ||
| 256 | """ |
|
| 257 | d_mat = self._alignment_matrix(src, tar) |
|
| 258 | ||
| 259 | src_aligned = [] |
|
| 260 | tar_aligned = [] |
|
| 261 | ||
| 262 | src_pos = len(src) |
|
| 263 | tar_pos = len(tar) |
|
| 264 | ||
| 265 | distance = d_mat[src_pos, tar_pos] |
|
| 266 | ||
| 267 | while src_pos and tar_pos: |
|
| 268 | up = d_mat[src_pos, tar_pos - 1] |
|
| 269 | left = d_mat[src_pos - 1, tar_pos] |
|
| 270 | diag = d_mat[src_pos - 1, tar_pos - 1] |
|
| 271 | ||
| 272 | if diag <= min(up, left): |
|
| 273 | src_pos -= 1 |
|
| 274 | tar_pos -= 1 |
|
| 275 | src_aligned.append(src[src_pos]) |
|
| 276 | tar_aligned.append(tar[tar_pos]) |
|
| 277 | elif up <= left: |
|
| 278 | tar_pos -= 1 |
|
| 279 | src_aligned.append('-') |
|
| 280 | tar_aligned.append(tar[tar_pos]) |
|
| 281 | else: |
|
| 282 | src_pos -= 1 |
|
| 283 | src_aligned.append(src[src_pos]) |
|
| 284 | tar_aligned.append('-') |
|
| 285 | while tar_pos: |
|
| 286 | tar_pos -= 1 |
|
| 287 | tar_aligned.append(tar[tar_pos]) |
|
| 288 | src_aligned.append('-') |
|
| 289 | while src_pos: |
|
| 290 | src_pos -= 1 |
|
| 291 | src_aligned.append(src[src_pos]) |
|
| 292 | tar_aligned.append('-') |
|
| 293 | ||
| 294 | return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1]) |
|
| 295 | ||
| 296 | def dist_abs(self, src, tar): |
|
| 297 | """Return the Levenshtein distance between two strings. |
|
| @@ 188-263 (lines=76) @@ | ||
| 185 | ||
| 186 | return d_mat |
|
| 187 | ||
| 188 | def alignment(self, src, tar): |
|
| 189 | """Return the Levenshtein alignment of two strings. |
|
| 190 | ||
| 191 | Parameters |
|
| 192 | ---------- |
|
| 193 | src : str |
|
| 194 | Source string for comparison |
|
| 195 | tar : str |
|
| 196 | Target string for comparison |
|
| 197 | ||
| 198 | Returns |
|
| 199 | ------- |
|
| 200 | tuple |
|
| 201 | A tuple containing the Levenshtein distance and the two strings, |
|
| 202 | aligned. |
|
| 203 | ||
| 204 | Examples |
|
| 205 | -------- |
|
| 206 | >>> cmp = Levenshtein() |
|
| 207 | >>> cmp.alignment('cat', 'hat') |
|
| 208 | (1.0, 'cat', 'hat') |
|
| 209 | >>> cmp.alignment('Niall', 'Neil') |
|
| 210 | (3.0, 'Niall', 'Neil-') |
|
| 211 | >>> cmp.alignment('aluminum', 'Catalan') |
|
| 212 | (7.0, '-aluminum', 'Catalan--') |
|
| 213 | >>> cmp.alignment('ATCG', 'TAGC') |
|
| 214 | (3.0, 'ATCG-', '-TAGC') |
|
| 215 | ||
| 216 | >>> cmp = Levenshtein(mode='osa') |
|
| 217 | >>> cmp.alignment('ATCG', 'TAGC') |
|
| 218 | (2.0, 'ATCG', 'TAGC') |
|
| 219 | >>> cmp.alignment('ACTG', 'TAGC') |
|
| 220 | (4.0, 'ACTG', 'TAGC') |
|
| 221 | ||
| 222 | ||
| 223 | .. versionadded:: 0.4.1 |
|
| 224 | ||
| 225 | """ |
|
| 226 | d_mat = self._alignment_matrix(src, tar) |
|
| 227 | ||
| 228 | src_aligned = [] |
|
| 229 | tar_aligned = [] |
|
| 230 | ||
| 231 | src_pos = len(src) |
|
| 232 | tar_pos = len(tar) |
|
| 233 | ||
| 234 | distance = d_mat[src_pos, tar_pos] |
|
| 235 | ||
| 236 | while src_pos and tar_pos: |
|
| 237 | up = d_mat[src_pos, tar_pos - 1] |
|
| 238 | left = d_mat[src_pos - 1, tar_pos] |
|
| 239 | diag = d_mat[src_pos - 1, tar_pos - 1] |
|
| 240 | ||
| 241 | if diag <= min(up, left): |
|
| 242 | src_pos -= 1 |
|
| 243 | tar_pos -= 1 |
|
| 244 | src_aligned.append(src[src_pos]) |
|
| 245 | tar_aligned.append(tar[tar_pos]) |
|
| 246 | elif up <= left: |
|
| 247 | tar_pos -= 1 |
|
| 248 | src_aligned.append('-') |
|
| 249 | tar_aligned.append(tar[tar_pos]) |
|
| 250 | else: |
|
| 251 | src_pos -= 1 |
|
| 252 | src_aligned.append(src[src_pos]) |
|
| 253 | tar_aligned.append('-') |
|
| 254 | while tar_pos: |
|
| 255 | tar_pos -= 1 |
|
| 256 | tar_aligned.append(tar[tar_pos]) |
|
| 257 | src_aligned.append('-') |
|
| 258 | while src_pos: |
|
| 259 | src_pos -= 1 |
|
| 260 | src_aligned.append(src[src_pos]) |
|
| 261 | tar_aligned.append('-') |
|
| 262 | ||
| 263 | return distance, ''.join(src_aligned[::-1]), ''.join(tar_aligned[::-1]) |
|
| 264 | ||
| 265 | def dist_abs(self, src, tar): |
|
| 266 | """Return the Levenshtein distance between two strings. |
|