@@ 6074-6096 (lines=23) @@ | ||
6071 | nn_line[1]) |
|
6072 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
|
6073 | ||
6074 | def test_bmpm_uscensus2000(self): |
|
6075 | """Test abydos.phonetic.bmpm (US Census 2000 set).""" |
|
6076 | if not ALLOW_RANDOM: |
|
6077 | return |
|
6078 | with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
|
6079 | next(uscensus_ts) |
|
6080 | for cen_line in uscensus_ts: |
|
6081 | cen_line = cen_line.strip().split(',') |
|
6082 | # This test set is very large (~150000 entries) |
|
6083 | # so let's just randomly select about 20 for testing |
|
6084 | if cen_line[0] != '#' and one_in(7500): |
|
6085 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6086 | name_mode='gen'), cen_line[1]) |
|
6087 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6088 | name_mode='ash'), cen_line[2]) |
|
6089 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6090 | name_mode='sep'), cen_line[3]) |
|
6091 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6092 | name_mode='gen'), cen_line[4]) |
|
6093 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6094 | name_mode='ash'), cen_line[5]) |
|
6095 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6096 | name_mode='sep'), cen_line[6]) |
|
6097 | ||
6098 | def test_bmpm_uscensus2000_cc(self): |
|
6099 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6098-6118 (lines=21) @@ | ||
6095 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6096 | name_mode='sep'), cen_line[6]) |
|
6097 | ||
6098 | def test_bmpm_uscensus2000_cc(self): |
|
6099 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
6100 | with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
|
6101 | next(uscensus_ts) |
|
6102 | for cen_line in uscensus_ts: |
|
6103 | cen_line = cen_line.strip().split(',') |
|
6104 | # This test set is very large (~150000 entries) |
|
6105 | # so let's just randomly select about 20 for testing |
|
6106 | if cen_line[0] != '#' and one_in(10): |
|
6107 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6108 | name_mode='gen'), cen_line[1]) |
|
6109 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6110 | name_mode='ash'), cen_line[2]) |
|
6111 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6112 | name_mode='sep'), cen_line[3]) |
|
6113 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6114 | name_mode='gen'), cen_line[4]) |
|
6115 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6116 | name_mode='ash'), cen_line[5]) |
|
6117 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6118 | name_mode='sep'), cen_line[6]) |
|
6119 | ||
6120 | def test_bm_phonetic_number(self): |
|
6121 | """Test abydos.bm._bm_phonetic_number.""" |