|
@@ 5895-5917 (lines=23) @@
|
| 5892 |
|
nn_line[1]) |
| 5893 |
|
self.assertEqual(bmpm(nn_line[0]), nn_line[2]) |
| 5894 |
|
|
| 5895 |
|
def test_bmpm_uscensus2000(self): |
| 5896 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set).""" |
| 5897 |
|
if not ALLOW_RANDOM: |
| 5898 |
|
return |
| 5899 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts: |
| 5900 |
|
next(uscensus_ts) |
| 5901 |
|
for cen_line in uscensus_ts: |
| 5902 |
|
cen_line = cen_line.strip().split(',') |
| 5903 |
|
# This test set is very large (~150000 entries) |
| 5904 |
|
# so let's just randomly select about 20 for testing |
| 5905 |
|
if cen_line[0] != '#' and one_in(7500): |
| 5906 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5907 |
|
name_mode='gen'), cen_line[1]) |
| 5908 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5909 |
|
name_mode='ash'), cen_line[2]) |
| 5910 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5911 |
|
name_mode='sep'), cen_line[3]) |
| 5912 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5913 |
|
name_mode='gen'), cen_line[4]) |
| 5914 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5915 |
|
name_mode='ash'), cen_line[5]) |
| 5916 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5917 |
|
name_mode='sep'), cen_line[6]) |
| 5918 |
|
|
| 5919 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5920 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
|
@@ 5919-5939 (lines=21) @@
|
| 5916 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5917 |
|
name_mode='sep'), cen_line[6]) |
| 5918 |
|
|
| 5919 |
|
def test_bmpm_uscensus2000_cc(self): |
| 5920 |
|
"""Test abydos.phonetic.bmpm (US Census 2000 set, corner cases).""" |
| 5921 |
|
with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts: |
| 5922 |
|
next(uscensus_ts) |
| 5923 |
|
for cen_line in uscensus_ts: |
| 5924 |
|
cen_line = cen_line.strip().split(',') |
| 5925 |
|
# This test set is very large (~150000 entries) |
| 5926 |
|
# so let's just randomly select about 20 for testing |
| 5927 |
|
if cen_line[0] != '#' and one_in(10): |
| 5928 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5929 |
|
name_mode='gen'), cen_line[1]) |
| 5930 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5931 |
|
name_mode='ash'), cen_line[2]) |
| 5932 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='approx', |
| 5933 |
|
name_mode='sep'), cen_line[3]) |
| 5934 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5935 |
|
name_mode='gen'), cen_line[4]) |
| 5936 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5937 |
|
name_mode='ash'), cen_line[5]) |
| 5938 |
|
self.assertEqual(bmpm(cen_line[0], match_mode='exact', |
| 5939 |
|
name_mode='sep'), cen_line[6]) |
| 5940 |
|
|
| 5941 |
|
def test_bm_phonetic_number(self): |
| 5942 |
|
"""Test abydos.bm._bm_phonetic_number.""" |