Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 5915-5937 (lines=23) @@
5912
                                     nn_line[1])
5913
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
5914
5915
    def test_bmpm_uscensus2000(self):
5916
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
5917
        if not ALLOW_RANDOM:
5918
            return
5919
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
5920
            next(uscensus_ts)
5921
            for cen_line in uscensus_ts:
5922
                cen_line = cen_line.strip().split(',')
5923
                # This test set is very large (~150000 entries)
5924
                # so let's just randomly select about 20 for testing
5925
                if cen_line[0] != '#' and one_in(7500):
5926
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5927
                                          name_mode='gen'), cen_line[1])
5928
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5929
                                          name_mode='ash'), cen_line[2])
5930
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5931
                                          name_mode='sep'), cen_line[3])
5932
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5933
                                          name_mode='gen'), cen_line[4])
5934
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5935
                                          name_mode='ash'), cen_line[5])
5936
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5937
                                          name_mode='sep'), cen_line[6])
5938
5939
    def test_bmpm_uscensus2000_cc(self):
5940
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 5939-5959 (lines=21) @@
5936
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5937
                                          name_mode='sep'), cen_line[6])
5938
5939
    def test_bmpm_uscensus2000_cc(self):
5940
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
5941
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
5942
            next(uscensus_ts)
5943
            for cen_line in uscensus_ts:
5944
                cen_line = cen_line.strip().split(',')
5945
                # This test set is very large (~150000 entries)
5946
                # so let's just randomly select about 20 for testing
5947
                if cen_line[0] != '#' and one_in(10):
5948
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5949
                                          name_mode='gen'), cen_line[1])
5950
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5951
                                          name_mode='ash'), cen_line[2])
5952
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5953
                                          name_mode='sep'), cen_line[3])
5954
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5955
                                          name_mode='gen'), cen_line[4])
5956
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5957
                                          name_mode='ash'), cen_line[5])
5958
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5959
                                          name_mode='sep'), cen_line[6])
5960
5961
    def test_bm_phonetic_number(self):
5962
        """Test abydos.bm._bm_phonetic_number."""