@@ 4882-4904 (lines=23) @@ | ||
4879 | nn_line[1]) |
|
4880 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
|
4881 | ||
4882 | def test_bmpm_uscensus2000(self): |
|
4883 | """Test abydos.phonetic.bmpm (US Census 2000 set).""" |
|
4884 | if not ALLOW_RANDOM: |
|
4885 | return |
|
4886 | with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
|
4887 | next(uscensus_ts) |
|
4888 | for cen_line in uscensus_ts: |
|
4889 | cen_line = cen_line.strip().split(',') |
|
4890 | # This test set is very large (~150000 entries) |
|
4891 | # so let's just randomly select about 20 for testing |
|
4892 | if cen_line[0] != '#' and one_in(7500): |
|
4893 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4894 | name_mode='gen'), cen_line[1]) |
|
4895 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4896 | name_mode='ash'), cen_line[2]) |
|
4897 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4898 | name_mode='sep'), cen_line[3]) |
|
4899 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4900 | name_mode='gen'), cen_line[4]) |
|
4901 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4902 | name_mode='ash'), cen_line[5]) |
|
4903 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4904 | name_mode='sep'), cen_line[6]) |
|
4905 | ||
4906 | def test_bmpm_uscensus2000_cc(self): |
|
4907 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 4906-4926 (lines=21) @@ | ||
4903 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4904 | name_mode='sep'), cen_line[6]) |
|
4905 | ||
4906 | def test_bmpm_uscensus2000_cc(self): |
|
4907 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
4908 | with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
|
4909 | next(uscensus_ts) |
|
4910 | for cen_line in uscensus_ts: |
|
4911 | cen_line = cen_line.strip().split(',') |
|
4912 | # This test set is very large (~150000 entries) |
|
4913 | # so let's just randomly select about 20 for testing |
|
4914 | if cen_line[0] != '#' and one_in(10): |
|
4915 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4916 | name_mode='gen'), cen_line[1]) |
|
4917 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4918 | name_mode='ash'), cen_line[2]) |
|
4919 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4920 | name_mode='sep'), cen_line[3]) |
|
4921 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4922 | name_mode='gen'), cen_line[4]) |
|
4923 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4924 | name_mode='ash'), cen_line[5]) |
|
4925 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4926 | name_mode='sep'), cen_line[6]) |
|
4927 | ||
4928 | def test_bm_phonetic_number(self): |
|
4929 | """Test abydos.bm._bm_phonetic_number.""" |