|
@@ 6788-6810 (lines=23) @@
|
| 6785 |
|
nn_line[1]) |
| 6786 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 6787 |
|
|
| 6788 |
|
def test_bmpm_uscensus2000(self): |
| 6789 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 6790 |
|
if not ALLOW_RANDOM: |
| 6791 |
|
return |
| 6792 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 6793 |
|
next(uscensus_ts) |
| 6794 |
|
for cen_line in uscensus_ts: |
| 6795 |
|
cen_line = cen_line.strip().split(',') |
| 6796 |
|
# This test set is very large (~150000 entries) |
| 6797 |
|
# so let's just randomly select about 20 for testing |
| 6798 |
|
if cen_line[0] != '#' and _one_in(7500): |
| 6799 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6800 |
|
name_mode='gen'), cen_line[1]) |
| 6801 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6802 |
|
name_mode='ash'), cen_line[2]) |
| 6803 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6804 |
|
name_mode='sep'), cen_line[3]) |
| 6805 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6806 |
|
name_mode='gen'), cen_line[4]) |
| 6807 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6808 |
|
name_mode='ash'), cen_line[5]) |
| 6809 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6810 |
|
name_mode='sep'), cen_line[6]) |
| 6811 |
|
|
| 6812 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6813 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6812-6832 (lines=21) @@
|
| 6809 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6810 |
|
name_mode='sep'), cen_line[6]) |
| 6811 |
|
|
| 6812 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6813 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 6814 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 6815 |
|
next(uscensus_ts) |
| 6816 |
|
for cen_line in uscensus_ts: |
| 6817 |
|
cen_line = cen_line.strip().split(',') |
| 6818 |
|
# This test set is very large (~150000 entries) |
| 6819 |
|
# so let's just randomly select about 20 for testing |
| 6820 |
|
if cen_line[0] != '#' and _one_in(10): |
| 6821 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6822 |
|
name_mode='gen'), cen_line[1]) |
| 6823 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6824 |
|
name_mode='ash'), cen_line[2]) |
| 6825 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6826 |
|
name_mode='sep'), cen_line[3]) |
| 6827 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6828 |
|
name_mode='gen'), cen_line[4]) |
| 6829 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6830 |
|
name_mode='ash'), cen_line[5]) |
| 6831 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6832 |
|
name_mode='sep'), cen_line[6]) |
| 6833 |
|
|
| 6834 |
|
def test_bm_phonetic_number(self): |
| 6835 |
|
"""Test abydos.bm._bm_phonetic_number.""" |