|
@@ 6805-6827 (lines=23) @@
|
| 6802 |
|
nn_line[1]) |
| 6803 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 6804 |
|
|
| 6805 |
|
def test_bmpm_uscensus2000(self): |
| 6806 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 6807 |
|
if not ALLOW_RANDOM: |
| 6808 |
|
return |
| 6809 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 6810 |
|
next(uscensus_ts) |
| 6811 |
|
for cen_line in uscensus_ts: |
| 6812 |
|
cen_line = cen_line.strip().split(',') |
| 6813 |
|
# This test set is very large (~150000 entries) |
| 6814 |
|
# so let's just randomly select about 20 for testing |
| 6815 |
|
if cen_line[0] != '#' and one_in(7500): |
| 6816 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6817 |
|
name_mode='gen'), cen_line[1]) |
| 6818 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6819 |
|
name_mode='ash'), cen_line[2]) |
| 6820 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6821 |
|
name_mode='sep'), cen_line[3]) |
| 6822 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6823 |
|
name_mode='gen'), cen_line[4]) |
| 6824 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6825 |
|
name_mode='ash'), cen_line[5]) |
| 6826 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6827 |
|
name_mode='sep'), cen_line[6]) |
| 6828 |
|
|
| 6829 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6830 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6829-6849 (lines=21) @@
|
| 6826 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6827 |
|
name_mode='sep'), cen_line[6]) |
| 6828 |
|
|
| 6829 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6830 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 6831 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 6832 |
|
next(uscensus_ts) |
| 6833 |
|
for cen_line in uscensus_ts: |
| 6834 |
|
cen_line = cen_line.strip().split(',') |
| 6835 |
|
# This test set is very large (~150000 entries) |
| 6836 |
|
# so let's just randomly select about 20 for testing |
| 6837 |
|
if cen_line[0] != '#' and one_in(10): |
| 6838 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6839 |
|
name_mode='gen'), cen_line[1]) |
| 6840 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6841 |
|
name_mode='ash'), cen_line[2]) |
| 6842 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6843 |
|
name_mode='sep'), cen_line[3]) |
| 6844 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6845 |
|
name_mode='gen'), cen_line[4]) |
| 6846 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6847 |
|
name_mode='ash'), cen_line[5]) |
| 6848 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6849 |
|
name_mode='sep'), cen_line[6]) |
| 6850 |
|
|
| 6851 |
|
def test_bm_phonetic_number(self): |
| 6852 |
|
"""Test abydos.bm._bm_phonetic_number.""" |