Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 6074-6096 (lines=23) @@
6071
                                     nn_line[1])
6072
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
6073
6074
    def test_bmpm_uscensus2000(self):
6075
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
6076
        if not ALLOW_RANDOM:
6077
            return
6078
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
6079
            next(uscensus_ts)
6080
            for cen_line in uscensus_ts:
6081
                cen_line = cen_line.strip().split(',')
6082
                # This test set is very large (~150000 entries)
6083
                # so let's just randomly select about 20 for testing
6084
                if cen_line[0] != '#' and one_in(7500):
6085
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6086
                                          name_mode='gen'), cen_line[1])
6087
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6088
                                          name_mode='ash'), cen_line[2])
6089
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6090
                                          name_mode='sep'), cen_line[3])
6091
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6092
                                          name_mode='gen'), cen_line[4])
6093
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6094
                                          name_mode='ash'), cen_line[5])
6095
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6096
                                          name_mode='sep'), cen_line[6])
6097
6098
    def test_bmpm_uscensus2000_cc(self):
6099
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 6098-6118 (lines=21) @@
6095
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6096
                                          name_mode='sep'), cen_line[6])
6097
6098
    def test_bmpm_uscensus2000_cc(self):
6099
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
6100
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
6101
            next(uscensus_ts)
6102
            for cen_line in uscensus_ts:
6103
                cen_line = cen_line.strip().split(',')
6104
                # This test set is very large (~150000 entries)
6105
                # so let's just randomly select about 20 for testing
6106
                if cen_line[0] != '#' and one_in(10):
6107
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6108
                                          name_mode='gen'), cen_line[1])
6109
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6110
                                          name_mode='ash'), cen_line[2])
6111
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6112
                                          name_mode='sep'), cen_line[3])
6113
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6114
                                          name_mode='gen'), cen_line[4])
6115
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6116
                                          name_mode='ash'), cen_line[5])
6117
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6118
                                          name_mode='sep'), cen_line[6])
6119
6120
    def test_bm_phonetic_number(self):
6121
        """Test abydos.bm._bm_phonetic_number."""