|
@@ 5217-5239 (lines=23) @@
|
| 5214 |
|
nn_line[1]) |
| 5215 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 5216 |
|
|
| 5217 |
|
def test_bmpm_uscensus2000(self): |
| 5218 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 5219 |
|
if not ALLOW_RANDOM: |
| 5220 |
|
return |
| 5221 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 5222 |
|
next(uscensus_ts) |
| 5223 |
|
for cen_line in uscensus_ts: |
| 5224 |
|
cen_line = cen_line.strip().split(',') |
| 5225 |
|
# This test set is very large (~150000 entries) |
| 5226 |
|
# so let's just randomly select about 20 for testing |
| 5227 |
|
if cen_line[0] != '#' and one_in(7500): |
| 5228 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5229 |
|
name_mode='gen'), cen_line[1]) |
| 5230 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5231 |
|
name_mode='ash'), cen_line[2]) |
| 5232 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5233 |
|
name_mode='sep'), cen_line[3]) |
| 5234 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5235 |
|
name_mode='gen'), cen_line[4]) |
| 5236 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5237 |
|
name_mode='ash'), cen_line[5]) |
| 5238 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5239 |
|
name_mode='sep'), cen_line[6]) |
| 5240 |
|
|
| 5241 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5242 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 5241-5261 (lines=21) @@
|
| 5238 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5239 |
|
name_mode='sep'), cen_line[6]) |
| 5240 |
|
|
| 5241 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5242 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 5243 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 5244 |
|
next(uscensus_ts) |
| 5245 |
|
for cen_line in uscensus_ts: |
| 5246 |
|
cen_line = cen_line.strip().split(',') |
| 5247 |
|
# This test set is very large (~150000 entries) |
| 5248 |
|
# so let's just randomly select about 20 for testing |
| 5249 |
|
if cen_line[0] != '#' and one_in(10): |
| 5250 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5251 |
|
name_mode='gen'), cen_line[1]) |
| 5252 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5253 |
|
name_mode='ash'), cen_line[2]) |
| 5254 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5255 |
|
name_mode='sep'), cen_line[3]) |
| 5256 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5257 |
|
name_mode='gen'), cen_line[4]) |
| 5258 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5259 |
|
name_mode='ash'), cen_line[5]) |
| 5260 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5261 |
|
name_mode='sep'), cen_line[6]) |
| 5262 |
|
|
| 5263 |
|
def test_bm_phonetic_number(self): |
| 5264 |
|
"""Test abydos.bm._bm_phonetic_number.""" |