|
@@ 6048-6070 (lines=23) @@
|
| 6045 |
|
nn_line[1]) |
| 6046 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 6047 |
|
|
| 6048 |
|
def test_bmpm_uscensus2000(self): |
| 6049 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 6050 |
|
if not ALLOW_RANDOM: |
| 6051 |
|
return |
| 6052 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 6053 |
|
next(uscensus_ts) |
| 6054 |
|
for cen_line in uscensus_ts: |
| 6055 |
|
cen_line = cen_line.strip().split(',') |
| 6056 |
|
# This test set is very large (~150000 entries) |
| 6057 |
|
# so let's just randomly select about 20 for testing |
| 6058 |
|
if cen_line[0] != '#' and one_in(7500): |
| 6059 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6060 |
|
name_mode='gen'), cen_line[1]) |
| 6061 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6062 |
|
name_mode='ash'), cen_line[2]) |
| 6063 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6064 |
|
name_mode='sep'), cen_line[3]) |
| 6065 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6066 |
|
name_mode='gen'), cen_line[4]) |
| 6067 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6068 |
|
name_mode='ash'), cen_line[5]) |
| 6069 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6070 |
|
name_mode='sep'), cen_line[6]) |
| 6071 |
|
|
| 6072 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6073 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6072-6092 (lines=21) @@
|
| 6069 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6070 |
|
name_mode='sep'), cen_line[6]) |
| 6071 |
|
|
| 6072 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6073 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 6074 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 6075 |
|
next(uscensus_ts) |
| 6076 |
|
for cen_line in uscensus_ts: |
| 6077 |
|
cen_line = cen_line.strip().split(',') |
| 6078 |
|
# This test set is very large (~150000 entries) |
| 6079 |
|
# so let's just randomly select about 20 for testing |
| 6080 |
|
if cen_line[0] != '#' and one_in(10): |
| 6081 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6082 |
|
name_mode='gen'), cen_line[1]) |
| 6083 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6084 |
|
name_mode='ash'), cen_line[2]) |
| 6085 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6086 |
|
name_mode='sep'), cen_line[3]) |
| 6087 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6088 |
|
name_mode='gen'), cen_line[4]) |
| 6089 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6090 |
|
name_mode='ash'), cen_line[5]) |
| 6091 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6092 |
|
name_mode='sep'), cen_line[6]) |
| 6093 |
|
|
| 6094 |
|
def test_bm_phonetic_number(self): |
| 6095 |
|
"""Test abydos.bm._bm_phonetic_number.""" |