|
@@ 5915-5937 (lines=23) @@
|
| 5912 |
|
nn_line[1]) |
| 5913 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 5914 |
|
|
| 5915 |
|
def test_bmpm_uscensus2000(self): |
| 5916 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 5917 |
|
if not ALLOW_RANDOM: |
| 5918 |
|
return |
| 5919 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 5920 |
|
next(uscensus_ts) |
| 5921 |
|
for cen_line in uscensus_ts: |
| 5922 |
|
cen_line = cen_line.strip().split(',') |
| 5923 |
|
# This test set is very large (~150000 entries) |
| 5924 |
|
# so let's just randomly select about 20 for testing |
| 5925 |
|
if cen_line[0] != '#' and one_in(7500): |
| 5926 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5927 |
|
name_mode='gen'), cen_line[1]) |
| 5928 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5929 |
|
name_mode='ash'), cen_line[2]) |
| 5930 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5931 |
|
name_mode='sep'), cen_line[3]) |
| 5932 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5933 |
|
name_mode='gen'), cen_line[4]) |
| 5934 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5935 |
|
name_mode='ash'), cen_line[5]) |
| 5936 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5937 |
|
name_mode='sep'), cen_line[6]) |
| 5938 |
|
|
| 5939 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5940 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 5939-5959 (lines=21) @@
|
| 5936 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5937 |
|
name_mode='sep'), cen_line[6]) |
| 5938 |
|
|
| 5939 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5940 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 5941 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 5942 |
|
next(uscensus_ts) |
| 5943 |
|
for cen_line in uscensus_ts: |
| 5944 |
|
cen_line = cen_line.strip().split(',') |
| 5945 |
|
# This test set is very large (~150000 entries) |
| 5946 |
|
# so let's just randomly select about 20 for testing |
| 5947 |
|
if cen_line[0] != '#' and one_in(10): |
| 5948 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5949 |
|
name_mode='gen'), cen_line[1]) |
| 5950 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5951 |
|
name_mode='ash'), cen_line[2]) |
| 5952 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5953 |
|
name_mode='sep'), cen_line[3]) |
| 5954 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5955 |
|
name_mode='gen'), cen_line[4]) |
| 5956 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5957 |
|
name_mode='ash'), cen_line[5]) |
| 5958 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5959 |
|
name_mode='sep'), cen_line[6]) |
| 5960 |
|
|
| 5961 |
|
def test_bm_phonetic_number(self): |
| 5962 |
|
"""Test abydos.bm._bm_phonetic_number.""" |