Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 4882-4904 (lines=23) @@
4879
                                     nn_line[1])
4880
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
4881
4882
    def test_bmpm_uscensus2000(self):
4883
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
4884
        if not ALLOW_RANDOM:
4885
            return
4886
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
4887
            next(uscensus_ts)
4888
            for cen_line in uscensus_ts:
4889
                cen_line = cen_line.strip().split(',')
4890
                # This test set is very large (~150000 entries)
4891
                # so let's just randomly select about 20 for testing
4892
                if cen_line[0] != '#' and one_in(7500):
4893
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
4894
                                          name_mode='gen'), cen_line[1])
4895
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
4896
                                          name_mode='ash'), cen_line[2])
4897
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
4898
                                          name_mode='sep'), cen_line[3])
4899
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4900
                                          name_mode='gen'), cen_line[4])
4901
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4902
                                          name_mode='ash'), cen_line[5])
4903
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4904
                                          name_mode='sep'), cen_line[6])
4905
4906
    def test_bmpm_uscensus2000_cc(self):
4907
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 4906-4926 (lines=21) @@
4903
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4904
                                          name_mode='sep'), cen_line[6])
4905
4906
    def test_bmpm_uscensus2000_cc(self):
4907
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
4908
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
4909
            next(uscensus_ts)
4910
            for cen_line in uscensus_ts:
4911
                cen_line = cen_line.strip().split(',')
4912
                # This test set is very large (~150000 entries)
4913
                # so let's just randomly select about 20 for testing
4914
                if cen_line[0] != '#' and one_in(10):
4915
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
4916
                                          name_mode='gen'), cen_line[1])
4917
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
4918
                                          name_mode='ash'), cen_line[2])
4919
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
4920
                                          name_mode='sep'), cen_line[3])
4921
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4922
                                          name_mode='gen'), cen_line[4])
4923
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4924
                                          name_mode='ash'), cen_line[5])
4925
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
4926
                                          name_mode='sep'), cen_line[6])
4927
4928
    def test_bm_phonetic_number(self):
4929
        """Test abydos.bm._bm_phonetic_number."""