Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 6381-6403 (lines=23) @@
6378
                                     nn_line[1])
6379
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
6380
6381
    def test_bmpm_uscensus2000(self):
6382
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
6383
        if not ALLOW_RANDOM:
6384
            return
6385
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
6386
            next(uscensus_ts)
6387
            for cen_line in uscensus_ts:
6388
                cen_line = cen_line.strip().split(',')
6389
                # This test set is very large (~150000 entries)
6390
                # so let's just randomly select about 20 for testing
6391
                if cen_line[0] != '#' and one_in(7500):
6392
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6393
                                          name_mode='gen'), cen_line[1])
6394
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6395
                                          name_mode='ash'), cen_line[2])
6396
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6397
                                          name_mode='sep'), cen_line[3])
6398
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6399
                                          name_mode='gen'), cen_line[4])
6400
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6401
                                          name_mode='ash'), cen_line[5])
6402
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6403
                                          name_mode='sep'), cen_line[6])
6404
6405
    def test_bmpm_uscensus2000_cc(self):
6406
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 6405-6425 (lines=21) @@
6402
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6403
                                          name_mode='sep'), cen_line[6])
6404
6405
    def test_bmpm_uscensus2000_cc(self):
6406
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
6407
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
6408
            next(uscensus_ts)
6409
            for cen_line in uscensus_ts:
6410
                cen_line = cen_line.strip().split(',')
6411
                # This test set is very large (~150000 entries)
6412
                # so let's just randomly select about 20 for testing
6413
                if cen_line[0] != '#' and one_in(10):
6414
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6415
                                          name_mode='gen'), cen_line[1])
6416
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6417
                                          name_mode='ash'), cen_line[2])
6418
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6419
                                          name_mode='sep'), cen_line[3])
6420
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6421
                                          name_mode='gen'), cen_line[4])
6422
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6423
                                          name_mode='ash'), cen_line[5])
6424
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6425
                                          name_mode='sep'), cen_line[6])
6426
6427
    def test_bm_phonetic_number(self):
6428
        """Test abydos.bm._bm_phonetic_number."""