Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 5033-5055 (lines=23) @@
5030
                                     nn_line[1])
5031
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
5032
5033
    def test_bmpm_uscensus2000(self):
5034
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
5035
        if not ALLOW_RANDOM:
5036
            return
5037
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
5038
            next(uscensus_ts)
5039
            for cen_line in uscensus_ts:
5040
                cen_line = cen_line.strip().split(',')
5041
                # This test set is very large (~150000 entries)
5042
                # so let's just randomly select about 20 for testing
5043
                if cen_line[0] != '#' and one_in(7500):
5044
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5045
                                          name_mode='gen'), cen_line[1])
5046
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5047
                                          name_mode='ash'), cen_line[2])
5048
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5049
                                          name_mode='sep'), cen_line[3])
5050
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5051
                                          name_mode='gen'), cen_line[4])
5052
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5053
                                          name_mode='ash'), cen_line[5])
5054
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5055
                                          name_mode='sep'), cen_line[6])
5056
5057
    def test_bmpm_uscensus2000_cc(self):
5058
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 5057-5077 (lines=21) @@
5054
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5055
                                          name_mode='sep'), cen_line[6])
5056
5057
    def test_bmpm_uscensus2000_cc(self):
5058
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
5059
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
5060
            next(uscensus_ts)
5061
            for cen_line in uscensus_ts:
5062
                cen_line = cen_line.strip().split(',')
5063
                # This test set is very large (~150000 entries)
5064
                # so let's just randomly select about 20 for testing
5065
                if cen_line[0] != '#' and one_in(10):
5066
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5067
                                          name_mode='gen'), cen_line[1])
5068
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5069
                                          name_mode='ash'), cen_line[2])
5070
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5071
                                          name_mode='sep'), cen_line[3])
5072
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5073
                                          name_mode='gen'), cen_line[4])
5074
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5075
                                          name_mode='ash'), cen_line[5])
5076
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5077
                                          name_mode='sep'), cen_line[6])
5078
5079
    def test_bm_phonetic_number(self):
5080
        """Test abydos.bm._bm_phonetic_number."""