|
@@ 5235-5257 (lines=23) @@
|
| 5232 |
|
nn_line[1]) |
| 5233 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 5234 |
|
|
| 5235 |
|
def test_bmpm_uscensus2000(self): |
| 5236 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 5237 |
|
if not ALLOW_RANDOM: |
| 5238 |
|
return |
| 5239 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 5240 |
|
next(uscensus_ts) |
| 5241 |
|
for cen_line in uscensus_ts: |
| 5242 |
|
cen_line = cen_line.strip().split(',') |
| 5243 |
|
# This test set is very large (~150000 entries) |
| 5244 |
|
# so let's just randomly select about 20 for testing |
| 5245 |
|
if cen_line[0] != '#' and one_in(7500): |
| 5246 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5247 |
|
name_mode='gen'), cen_line[1]) |
| 5248 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5249 |
|
name_mode='ash'), cen_line[2]) |
| 5250 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5251 |
|
name_mode='sep'), cen_line[3]) |
| 5252 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5253 |
|
name_mode='gen'), cen_line[4]) |
| 5254 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5255 |
|
name_mode='ash'), cen_line[5]) |
| 5256 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5257 |
|
name_mode='sep'), cen_line[6]) |
| 5258 |
|
|
| 5259 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5260 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 5259-5279 (lines=21) @@
|
| 5256 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5257 |
|
name_mode='sep'), cen_line[6]) |
| 5258 |
|
|
| 5259 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5260 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 5261 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 5262 |
|
next(uscensus_ts) |
| 5263 |
|
for cen_line in uscensus_ts: |
| 5264 |
|
cen_line = cen_line.strip().split(',') |
| 5265 |
|
# This test set is very large (~150000 entries) |
| 5266 |
|
# so let's just randomly select about 20 for testing |
| 5267 |
|
if cen_line[0] != '#' and one_in(10): |
| 5268 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5269 |
|
name_mode='gen'), cen_line[1]) |
| 5270 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5271 |
|
name_mode='ash'), cen_line[2]) |
| 5272 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5273 |
|
name_mode='sep'), cen_line[3]) |
| 5274 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5275 |
|
name_mode='gen'), cen_line[4]) |
| 5276 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5277 |
|
name_mode='ash'), cen_line[5]) |
| 5278 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5279 |
|
name_mode='sep'), cen_line[6]) |
| 5280 |
|
|
| 5281 |
|
def test_bm_phonetic_number(self): |
| 5282 |
|
"""Test abydos.bm._bm_phonetic_number.""" |