|
@@ 5033-5055 (lines=23) @@
|
| 5030 |
|
nn_line[1]) |
| 5031 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 5032 |
|
|
| 5033 |
|
def test_bmpm_uscensus2000(self): |
| 5034 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 5035 |
|
if not ALLOW_RANDOM: |
| 5036 |
|
return |
| 5037 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 5038 |
|
next(uscensus_ts) |
| 5039 |
|
for cen_line in uscensus_ts: |
| 5040 |
|
cen_line = cen_line.strip().split(',') |
| 5041 |
|
# This test set is very large (~150000 entries) |
| 5042 |
|
# so let's just randomly select about 20 for testing |
| 5043 |
|
if cen_line[0] != '#' and one_in(7500): |
| 5044 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5045 |
|
name_mode='gen'), cen_line[1]) |
| 5046 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5047 |
|
name_mode='ash'), cen_line[2]) |
| 5048 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5049 |
|
name_mode='sep'), cen_line[3]) |
| 5050 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5051 |
|
name_mode='gen'), cen_line[4]) |
| 5052 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5053 |
|
name_mode='ash'), cen_line[5]) |
| 5054 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5055 |
|
name_mode='sep'), cen_line[6]) |
| 5056 |
|
|
| 5057 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5058 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 5057-5077 (lines=21) @@
|
| 5054 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5055 |
|
name_mode='sep'), cen_line[6]) |
| 5056 |
|
|
| 5057 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5058 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 5059 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 5060 |
|
next(uscensus_ts) |
| 5061 |
|
for cen_line in uscensus_ts: |
| 5062 |
|
cen_line = cen_line.strip().split(',') |
| 5063 |
|
# This test set is very large (~150000 entries) |
| 5064 |
|
# so let's just randomly select about 20 for testing |
| 5065 |
|
if cen_line[0] != '#' and one_in(10): |
| 5066 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5067 |
|
name_mode='gen'), cen_line[1]) |
| 5068 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5069 |
|
name_mode='ash'), cen_line[2]) |
| 5070 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5071 |
|
name_mode='sep'), cen_line[3]) |
| 5072 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5073 |
|
name_mode='gen'), cen_line[4]) |
| 5074 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5075 |
|
name_mode='ash'), cen_line[5]) |
| 5076 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5077 |
|
name_mode='sep'), cen_line[6]) |
| 5078 |
|
|
| 5079 |
|
def test_bm_phonetic_number(self): |
| 5080 |
|
"""Test abydos.bm._bm_phonetic_number.""" |