|
@@ 4882-4904 (lines=23) @@
|
| 4879 |
|
nn_line[1]) |
| 4880 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 4881 |
|
|
| 4882 |
|
def test_bmpm_uscensus2000(self): |
| 4883 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 4884 |
|
if not ALLOW_RANDOM: |
| 4885 |
|
return |
| 4886 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 4887 |
|
next(uscensus_ts) |
| 4888 |
|
for cen_line in uscensus_ts: |
| 4889 |
|
cen_line = cen_line.strip().split(',') |
| 4890 |
|
# This test set is very large (~150000 entries) |
| 4891 |
|
# so let's just randomly select about 20 for testing |
| 4892 |
|
if cen_line[0] != '#' and one_in(7500): |
| 4893 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 4894 |
|
name_mode='gen'), cen_line[1]) |
| 4895 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 4896 |
|
name_mode='ash'), cen_line[2]) |
| 4897 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 4898 |
|
name_mode='sep'), cen_line[3]) |
| 4899 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4900 |
|
name_mode='gen'), cen_line[4]) |
| 4901 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4902 |
|
name_mode='ash'), cen_line[5]) |
| 4903 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4904 |
|
name_mode='sep'), cen_line[6]) |
| 4905 |
|
|
| 4906 |
|
def test_bmpm_uscensus2000_cc(self): |
| 4907 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 4906-4926 (lines=21) @@
|
| 4903 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4904 |
|
name_mode='sep'), cen_line[6]) |
| 4905 |
|
|
| 4906 |
|
def test_bmpm_uscensus2000_cc(self): |
| 4907 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 4908 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 4909 |
|
next(uscensus_ts) |
| 4910 |
|
for cen_line in uscensus_ts: |
| 4911 |
|
cen_line = cen_line.strip().split(',') |
| 4912 |
|
# This test set is very large (~150000 entries) |
| 4913 |
|
# so let's just randomly select about 20 for testing |
| 4914 |
|
if cen_line[0] != '#' and one_in(10): |
| 4915 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 4916 |
|
name_mode='gen'), cen_line[1]) |
| 4917 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 4918 |
|
name_mode='ash'), cen_line[2]) |
| 4919 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 4920 |
|
name_mode='sep'), cen_line[3]) |
| 4921 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4922 |
|
name_mode='gen'), cen_line[4]) |
| 4923 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4924 |
|
name_mode='ash'), cen_line[5]) |
| 4925 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 4926 |
|
name_mode='sep'), cen_line[6]) |
| 4927 |
|
|
| 4928 |
|
def test_bm_phonetic_number(self): |
| 4929 |
|
"""Test abydos.bm._bm_phonetic_number.""" |