Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 5895-5917 (lines=23) @@
5892
                                     nn_line[1])
5893
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
5894
5895
    def test_bmpm_uscensus2000(self):
5896
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
5897
        if not ALLOW_RANDOM:
5898
            return
5899
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
5900
            next(uscensus_ts)
5901
            for cen_line in uscensus_ts:
5902
                cen_line = cen_line.strip().split(',')
5903
                # This test set is very large (~150000 entries)
5904
                # so let's just randomly select about 20 for testing
5905
                if cen_line[0] != '#' and one_in(7500):
5906
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5907
                                          name_mode='gen'), cen_line[1])
5908
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5909
                                          name_mode='ash'), cen_line[2])
5910
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5911
                                          name_mode='sep'), cen_line[3])
5912
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5913
                                          name_mode='gen'), cen_line[4])
5914
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5915
                                          name_mode='ash'), cen_line[5])
5916
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5917
                                          name_mode='sep'), cen_line[6])
5918
5919
    def test_bmpm_uscensus2000_cc(self):
5920
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 5919-5939 (lines=21) @@
5916
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5917
                                          name_mode='sep'), cen_line[6])
5918
5919
    def test_bmpm_uscensus2000_cc(self):
5920
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
5921
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
5922
            next(uscensus_ts)
5923
            for cen_line in uscensus_ts:
5924
                cen_line = cen_line.strip().split(',')
5925
                # This test set is very large (~150000 entries)
5926
                # so let's just randomly select about 20 for testing
5927
                if cen_line[0] != '#' and one_in(10):
5928
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5929
                                          name_mode='gen'), cen_line[1])
5930
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5931
                                          name_mode='ash'), cen_line[2])
5932
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5933
                                          name_mode='sep'), cen_line[3])
5934
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5935
                                          name_mode='gen'), cen_line[4])
5936
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5937
                                          name_mode='ash'), cen_line[5])
5938
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5939
                                          name_mode='sep'), cen_line[6])
5940
5941
    def test_bm_phonetic_number(self):
5942
        """Test abydos.bm._bm_phonetic_number."""