Code Duplication    Length = 33-33 lines in 2 locations

abydos/distance/compression.py 2 locations

@@ 263-295 (lines=33) @@
260
    return 1 - dist_ncd_zlib(src, tar)
261
262
263
def dist_ncd_bz2(src, tar):
264
    """Return the NCD between two strings using bz2 compression.
265
266
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
267
268
    :param str src: source string for comparison
269
    :param str tar: target string for comparison
270
    :returns: compression distance
271
    :rtype: float
272
273
    >>> dist_ncd_bz2('cat', 'hat')
274
    0.08
275
    >>> dist_ncd_bz2('Niall', 'Neil')
276
    0.037037037037037035
277
    >>> dist_ncd_bz2('aluminum', 'Catalan')
278
    0.20689655172413793
279
    >>> dist_ncd_bz2('ATCG', 'TAGC')
280
    0.037037037037037035
281
    """
282
    if src == tar:
283
        return 0.0
284
285
    src = src.encode('utf-8')
286
    tar = tar.encode('utf-8')
287
288
    src_comp = encode(src, 'bz2_codec')[15:]
289
    tar_comp = encode(tar, 'bz2_codec')[15:]
290
    concat_comp = encode(src + tar, 'bz2_codec')[15:]
291
    concat_comp2 = encode(tar + src, 'bz2_codec')[15:]
292
293
    return ((min(len(concat_comp), len(concat_comp2)) -
294
             min(len(src_comp), len(tar_comp))) /
295
            max(len(src_comp), len(tar_comp)))
296
297
298
def sim_ncd_bz2(src, tar):
@@ 206-238 (lines=33) @@
203
    return 1 - dist_ncd_bwtrle(src, tar)
204
205
206
def dist_ncd_zlib(src, tar):
207
    """Return the NCD between two strings using zlib compression.
208
209
    Normalized compression distance (NCD) :cite:`Cilibrasi:2005`.
210
211
    :param str src: source string for comparison
212
    :param str tar: target string for comparison
213
    :returns: compression distance
214
    :rtype: float
215
216
    >>> dist_ncd_zlib('cat', 'hat')
217
    0.3333333333333333
218
    >>> dist_ncd_zlib('Niall', 'Neil')
219
    0.45454545454545453
220
    >>> dist_ncd_zlib('aluminum', 'Catalan')
221
    0.5714285714285714
222
    >>> dist_ncd_zlib('ATCG', 'TAGC')
223
    0.4
224
    """
225
    if src == tar:
226
        return 0.0
227
228
    src = src.encode('utf-8')
229
    tar = tar.encode('utf-8')
230
231
    src_comp = encode(src, 'zlib_codec')[2:]
232
    tar_comp = encode(tar, 'zlib_codec')[2:]
233
    concat_comp = encode(src + tar, 'zlib_codec')[2:]
234
    concat_comp2 = encode(tar + src, 'zlib_codec')[2:]
235
236
    return ((min(len(concat_comp), len(concat_comp2)) -
237
             min(len(src_comp), len(tar_comp))) /
238
            max(len(src_comp), len(tar_comp)))
239
240
241
def sim_ncd_zlib(src, tar):