@@ 4850-4872 (lines=23) @@ | ||
4847 | nn_line[1]) |
|
4848 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
|
4849 | ||
4850 | def test_bmpm_uscensus2000(self): |
|
4851 | """Test abydos.phonetic.bmpm (US Census 2000 set).""" |
|
4852 | if not ALLOW_RANDOM: |
|
4853 | return |
|
4854 | with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
|
4855 | next(uscensus_ts) |
|
4856 | for cen_line in uscensus_ts: |
|
4857 | cen_line = cen_line.strip().split(',') |
|
4858 | # This test set is very large (~150000 entries) |
|
4859 | # so let's just randomly select about 20 for testing |
|
4860 | if cen_line[0] != '#' and one_in(7500): |
|
4861 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4862 | name_mode='gen'), cen_line[1]) |
|
4863 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4864 | name_mode='ash'), cen_line[2]) |
|
4865 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4866 | name_mode='sep'), cen_line[3]) |
|
4867 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4868 | name_mode='gen'), cen_line[4]) |
|
4869 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4870 | name_mode='ash'), cen_line[5]) |
|
4871 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4872 | name_mode='sep'), cen_line[6]) |
|
4873 | ||
4874 | def test_bmpm_uscensus2000_cc(self): |
|
4875 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 4874-4894 (lines=21) @@ | ||
4871 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4872 | name_mode='sep'), cen_line[6]) |
|
4873 | ||
4874 | def test_bmpm_uscensus2000_cc(self): |
|
4875 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
4876 | with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
|
4877 | next(uscensus_ts) |
|
4878 | for cen_line in uscensus_ts: |
|
4879 | cen_line = cen_line.strip().split(',') |
|
4880 | # This test set is very large (~150000 entries) |
|
4881 | # so let's just randomly select about 20 for testing |
|
4882 | if cen_line[0] != '#' and one_in(10): |
|
4883 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4884 | name_mode='gen'), cen_line[1]) |
|
4885 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4886 | name_mode='ash'), cen_line[2]) |
|
4887 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
4888 | name_mode='sep'), cen_line[3]) |
|
4889 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4890 | name_mode='gen'), cen_line[4]) |
|
4891 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4892 | name_mode='ash'), cen_line[5]) |
|
4893 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
4894 | name_mode='sep'), cen_line[6]) |
|
4895 | ||
4896 | def test_bm_phonetic_number(self): |
|
4897 | """Test abydos.bm._bm_phonetic_number.""" |