@@ 5103-5125 (lines=23) @@ | ||
5100 | nn_line[1]) |
|
5101 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
|
5102 | ||
5103 | def test_bmpm_uscensus2000(self): |
|
5104 | """Test abydos.phonetic.bmpm (US Census 2000 set).""" |
|
5105 | if not ALLOW_RANDOM: |
|
5106 | return |
|
5107 | with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
|
5108 | next(uscensus_ts) |
|
5109 | for cen_line in uscensus_ts: |
|
5110 | cen_line = cen_line.strip().split(',') |
|
5111 | # This test set is very large (~150000 entries) |
|
5112 | # so let's just randomly select about 20 for testing |
|
5113 | if cen_line[0] != '#' and one_in(7500): |
|
5114 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
5115 | name_mode='gen'), cen_line[1]) |
|
5116 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
5117 | name_mode='ash'), cen_line[2]) |
|
5118 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
5119 | name_mode='sep'), cen_line[3]) |
|
5120 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5121 | name_mode='gen'), cen_line[4]) |
|
5122 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5123 | name_mode='ash'), cen_line[5]) |
|
5124 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5125 | name_mode='sep'), cen_line[6]) |
|
5126 | ||
5127 | def test_bmpm_uscensus2000_cc(self): |
|
5128 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 5127-5147 (lines=21) @@ | ||
5124 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5125 | name_mode='sep'), cen_line[6]) |
|
5126 | ||
5127 | def test_bmpm_uscensus2000_cc(self): |
|
5128 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
5129 | with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
|
5130 | next(uscensus_ts) |
|
5131 | for cen_line in uscensus_ts: |
|
5132 | cen_line = cen_line.strip().split(',') |
|
5133 | # This test set is very large (~150000 entries) |
|
5134 | # so let's just randomly select about 20 for testing |
|
5135 | if cen_line[0] != '#' and one_in(10): |
|
5136 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
5137 | name_mode='gen'), cen_line[1]) |
|
5138 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
5139 | name_mode='ash'), cen_line[2]) |
|
5140 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
5141 | name_mode='sep'), cen_line[3]) |
|
5142 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5143 | name_mode='gen'), cen_line[4]) |
|
5144 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5145 | name_mode='ash'), cen_line[5]) |
|
5146 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
5147 | name_mode='sep'), cen_line[6]) |
|
5148 | ||
5149 | def test_bm_phonetic_number(self): |
|
5150 | """Test abydos.bm._bm_phonetic_number.""" |