|
@@ 6381-6403 (lines=23) @@
|
| 6378 |
|
nn_line[1]) |
| 6379 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 6380 |
|
|
| 6381 |
|
def test_bmpm_uscensus2000(self): |
| 6382 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 6383 |
|
if not ALLOW_RANDOM: |
| 6384 |
|
return |
| 6385 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 6386 |
|
next(uscensus_ts) |
| 6387 |
|
for cen_line in uscensus_ts: |
| 6388 |
|
cen_line = cen_line.strip().split(',') |
| 6389 |
|
# This test set is very large (~150000 entries) |
| 6390 |
|
# so let's just randomly select about 20 for testing |
| 6391 |
|
if cen_line[0] != '#' and one_in(7500): |
| 6392 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6393 |
|
name_mode='gen'), cen_line[1]) |
| 6394 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6395 |
|
name_mode='ash'), cen_line[2]) |
| 6396 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6397 |
|
name_mode='sep'), cen_line[3]) |
| 6398 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6399 |
|
name_mode='gen'), cen_line[4]) |
| 6400 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6401 |
|
name_mode='ash'), cen_line[5]) |
| 6402 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6403 |
|
name_mode='sep'), cen_line[6]) |
| 6404 |
|
|
| 6405 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6406 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6405-6425 (lines=21) @@
|
| 6402 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6403 |
|
name_mode='sep'), cen_line[6]) |
| 6404 |
|
|
| 6405 |
|
def test_bmpm_uscensus2000_cc(self): |
| 6406 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 6407 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 6408 |
|
next(uscensus_ts) |
| 6409 |
|
for cen_line in uscensus_ts: |
| 6410 |
|
cen_line = cen_line.strip().split(',') |
| 6411 |
|
# This test set is very large (~150000 entries) |
| 6412 |
|
# so let's just randomly select about 20 for testing |
| 6413 |
|
if cen_line[0] != '#' and one_in(10): |
| 6414 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6415 |
|
name_mode='gen'), cen_line[1]) |
| 6416 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6417 |
|
name_mode='ash'), cen_line[2]) |
| 6418 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 6419 |
|
name_mode='sep'), cen_line[3]) |
| 6420 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6421 |
|
name_mode='gen'), cen_line[4]) |
| 6422 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6423 |
|
name_mode='ash'), cen_line[5]) |
| 6424 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 6425 |
|
name_mode='sep'), cen_line[6]) |
| 6426 |
|
|
| 6427 |
|
def test_bm_phonetic_number(self): |
| 6428 |
|
"""Test abydos.bm._bm_phonetic_number.""" |