Code Duplication    Length = 21-23 lines in 2 locations

tests/test_phonetic.py 2 locations

@@ 5235-5257 (lines=23) @@
5232
                                     nn_line[1])
5233
                    self.assertEqual(bmpm(nn_line[0]), nn_line[2])
5234
5235
    def test_bmpm_uscensus2000(self):
5236
        """Test abydos.phonetic.bmpm (US Census 2000 set)."""
5237
        if not ALLOW_RANDOM:
5238
            return
5239
        with open(TESTDIR + '/corpora/uscensus2000.bm.csv') as uscensus_ts:
5240
            next(uscensus_ts)
5241
            for cen_line in uscensus_ts:
5242
                cen_line = cen_line.strip().split(',')
5243
                # This test set is very large (~150000 entries)
5244
                # so let's just randomly select about 20 for testing
5245
                if cen_line[0] != '#' and one_in(7500):
5246
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5247
                                          name_mode='gen'), cen_line[1])
5248
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5249
                                          name_mode='ash'), cen_line[2])
5250
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5251
                                          name_mode='sep'), cen_line[3])
5252
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5253
                                          name_mode='gen'), cen_line[4])
5254
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5255
                                          name_mode='ash'), cen_line[5])
5256
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5257
                                          name_mode='sep'), cen_line[6])
5258
5259
    def test_bmpm_uscensus2000_cc(self):
5260
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
@@ 5259-5279 (lines=21) @@
5256
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5257
                                          name_mode='sep'), cen_line[6])
5258
5259
    def test_bmpm_uscensus2000_cc(self):
5260
        """Test abydos.phonetic.bmpm (US Census 2000 set, corner cases)."""
5261
        with open(TESTDIR + '/corpora/uscensus2000.bm.cc.csv') as uscensus_ts:
5262
            next(uscensus_ts)
5263
            for cen_line in uscensus_ts:
5264
                cen_line = cen_line.strip().split(',')
5265
                # This test set is very large (~150000 entries)
5266
                # so let's just randomly select about 20 for testing
5267
                if cen_line[0] != '#' and one_in(10):
5268
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5269
                                          name_mode='gen'), cen_line[1])
5270
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5271
                                          name_mode='ash'), cen_line[2])
5272
                    self.assertEqual(bmpm(cen_line[0], match_mode='approx',
5273
                                          name_mode='sep'), cen_line[3])
5274
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5275
                                          name_mode='gen'), cen_line[4])
5276
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5277
                                          name_mode='ash'), cen_line[5])
5278
                    self.assertEqual(bmpm(cen_line[0], match_mode='exact',
5279
                                          name_mode='sep'), cen_line[6])
5280
5281
    def test_bm_phonetic_number(self):
5282
        """Test abydos.bm._bm_phonetic_number."""