Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 6048-6070 (lines=23) @@
6045
                                     nn_line[1])
6046
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
6047
6048
    def test_bmpm_uscensus2000(self):
6049
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
6050
        if not ALLOW_RANDOM:
6051
            return
6052
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
6053
            next(uscensus_ts)
6054
            for cen_line in uscensus_ts:
6055
                cen_line = cen_line.strip().split(',')
6056
                # This test set is very large (~150000 entries)
6057
                # so let's just randomly select about 20 for testing
6058
                if cen_line[0] != '#' and one_in(7500):
6059
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6060
                                          name_mode='gen'), cen_line[1])
6061
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6062
                                          name_mode='ash'), cen_line[2])
6063
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6064
                                          name_mode='sep'), cen_line[3])
6065
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6066
                                          name_mode='gen'), cen_line[4])
6067
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6068
                                          name_mode='ash'), cen_line[5])
6069
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6070
                                          name_mode='sep'), cen_line[6])
6071
6072
    def test_bmpm_uscensus2000_cc(self):
6073
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 6072-6092 (lines=21) @@
6069
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6070
                                          name_mode='sep'), cen_line[6])
6071
6072
    def test_bmpm_uscensus2000_cc(self):
6073
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
6074
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
6075
            next(uscensus_ts)
6076
            for cen_line in uscensus_ts:
6077
                cen_line = cen_line.strip().split(',')
6078
                # This test set is very large (~150000 entries)
6079
                # so let's just randomly select about 20 for testing
6080
                if cen_line[0] != '#' and one_in(10):
6081
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6082
                                          name_mode='gen'), cen_line[1])
6083
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6084
                                          name_mode='ash'), cen_line[2])
6085
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
6086
                                          name_mode='sep'), cen_line[3])
6087
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6088
                                          name_mode='gen'), cen_line[4])
6089
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6090
                                          name_mode='ash'), cen_line[5])
6091
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
6092
                                          name_mode='sep'), cen_line[6])
6093
6094
    def test_bm_phonetic_number(self):
6095
        """Test abydos.bm._bm_phonetic_number."""