@@ 6381-6403 (lines=23) @@ | ||
6378 | nn_line[1]) |
|
6379 | self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
|
6380 | ||
6381 | def test_bmpm_uscensus2000(self): |
|
6382 | """Test abydos.phonetic.bmpm (US Census 2000 set).""" |
|
6383 | if not ALLOW_RANDOM: |
|
6384 | return |
|
6385 | with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
|
6386 | next(uscensus_ts) |
|
6387 | for cen_line in uscensus_ts: |
|
6388 | cen_line = cen_line.strip().split(',') |
|
6389 | # This test set is very large (~150000 entries) |
|
6390 | # so let's just randomly select about 20 for testing |
|
6391 | if cen_line[0] != '#' and one_in(7500): |
|
6392 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6393 | name_mode='gen'), cen_line[1]) |
|
6394 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6395 | name_mode='ash'), cen_line[2]) |
|
6396 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6397 | name_mode='sep'), cen_line[3]) |
|
6398 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6399 | name_mode='gen'), cen_line[4]) |
|
6400 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6401 | name_mode='ash'), cen_line[5]) |
|
6402 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6403 | name_mode='sep'), cen_line[6]) |
|
6404 | ||
6405 | def test_bmpm_uscensus2000_cc(self): |
|
6406 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 6405-6425 (lines=21) @@ | ||
6402 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6403 | name_mode='sep'), cen_line[6]) |
|
6404 | ||
6405 | def test_bmpm_uscensus2000_cc(self): |
|
6406 | """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
6407 | with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
|
6408 | next(uscensus_ts) |
|
6409 | for cen_line in uscensus_ts: |
|
6410 | cen_line = cen_line.strip().split(',') |
|
6411 | # This test set is very large (~150000 entries) |
|
6412 | # so let's just randomly select about 20 for testing |
|
6413 | if cen_line[0] != '#' and one_in(10): |
|
6414 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6415 | name_mode='gen'), cen_line[1]) |
|
6416 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6417 | name_mode='ash'), cen_line[2]) |
|
6418 | self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
|
6419 | name_mode='sep'), cen_line[3]) |
|
6420 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6421 | name_mode='gen'), cen_line[4]) |
|
6422 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6423 | name_mode='ash'), cen_line[5]) |
|
6424 | self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
|
6425 | name_mode='sep'), cen_line[6]) |
|
6426 | ||
6427 | def test_bm_phonetic_number(self): |
|
6428 | """Test abydos.bm._bm_phonetic_number.""" |