|
@@ 6109-6131 (lines=23) @@
|
| 6106 |
|
nn_line[1]) |
| 6107 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 6108 |
|
|
| 6109 |
|
def test_bmpm_uscensus2000(self): |
| 6110 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 6111 |
|
if not ALLOW_RANDOM: |
| 6112 |
|
return |
| 6113 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 6114 |
|
next(uscensus_ts) |
| 6115 |
|
for cen_line in uscensus_ts: |
| 6116 |
|
cen_line = cen_line.strip().split(',') |
| 6117 |
|
# This test set is very large (~150000 entries) |
| 6118 |
|
# so let's just randomly select about 20 for testing |
| 6119 |
|
if cen_line[0] != '#' and one_in(7500): |
| 6120 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6121 |
|
name_mode='gen'), cen_line[1]) |
| 6122 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6123 |
|
name_mode='ash'), cen_line[2]) |
| 6124 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6125 |
|
name_mode='sep'), cen_line[3]) |
| 6126 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6127 |
|
name_mode='gen'), cen_line[4]) |
| 6128 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6129 |
|
name_mode='ash'), cen_line[5]) |
| 6130 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6131 |
|
name_mode='sep'), cen_line[6]) |
| 6132 |
|
|
| 6133 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6134 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6133-6153 (lines=21) @@
|
| 6130 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6131 |
|
name_mode='sep'), cen_line[6]) |
| 6132 |
|
|
| 6133 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6134 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 6135 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 6136 |
|
next(uscensus_ts) |
| 6137 |
|
for cen_line in uscensus_ts: |
| 6138 |
|
cen_line = cen_line.strip().split(',') |
| 6139 |
|
# This test set is very large (~150000 entries) |
| 6140 |
|
# so let's just randomly select about 20 for testing |
| 6141 |
|
if cen_line[0] != '#' and one_in(10): |
| 6142 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6143 |
|
name_mode='gen'), cen_line[1]) |
| 6144 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6145 |
|
name_mode='ash'), cen_line[2]) |
| 6146 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6147 |
|
name_mode='sep'), cen_line[3]) |
| 6148 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6149 |
|
name_mode='gen'), cen_line[4]) |
| 6150 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6151 |
|
name_mode='ash'), cen_line[5]) |
| 6152 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6153 |
|
name_mode='sep'), cen_line[6]) |
| 6154 |
|
|
| 6155 |
|
def test_bm_phonetic_number(self): |
| 6156 |
|
"""Test abydos.bm._bm_phonetic_number.""" |