|
@@ 541-561 (lines=21) @@
|
| 538 |
|
self.assertEqual(sb_german('über'), 'uber') |
| 539 |
|
self.assertEqual(sb_german('ueber'), 'ueb') |
| 540 |
|
|
| 541 |
|
def test_sb_dutch_snowball(self): |
| 542 |
|
"""Test abydos.stemmer.sb_dutch (Snowball testset). |
| 543 |
|
|
| 544 |
|
These test cases are from |
| 545 |
|
http://snowball.tartarus.org/algorithms/dutch/diffs.txt |
| 546 |
|
""" |
| 547 |
|
# base case |
| 548 |
|
self.assertEqual(sb_dutch(''), '') |
| 549 |
|
|
| 550 |
|
# Snowball Dutch test set |
| 551 |
|
with codecs.open(TESTDIR+'/corpora/snowball_dutch.csv', |
| 552 |
|
encoding='utf-8') as snowball_testset: |
| 553 |
|
next(snowball_testset) |
| 554 |
|
for line in snowball_testset: |
| 555 |
|
if line[0] != '#': |
| 556 |
|
line = line.strip().split(',') |
| 557 |
|
word, stem = line[0], line[1] |
| 558 |
|
self.assertEqual(sb_dutch(word), stem.lower()) |
| 559 |
|
|
| 560 |
|
# missed branch test cases |
| 561 |
|
self.assertEqual(sb_dutch('zondulielijk'), 'zondulie') |
| 562 |
|
|
| 563 |
|
def test_sb_norwegian_snowball(self): |
| 564 |
|
"""Test abydos.stemmer.sb_norwegian (Snowball testset). |
|
@@ 468-488 (lines=21) @@
|
| 465 |
|
abydos.stemmer.sb_danish |
| 466 |
|
""" |
| 467 |
|
|
| 468 |
|
def test_sb_german_snowball(self): |
| 469 |
|
"""Test abydos.stemmer.sb_german (Snowball testset). |
| 470 |
|
|
| 471 |
|
These test cases are from |
| 472 |
|
http://snowball.tartarus.org/algorithms/german/diffs.txt |
| 473 |
|
""" |
| 474 |
|
# base case |
| 475 |
|
self.assertEqual(sb_german(''), '') |
| 476 |
|
|
| 477 |
|
# Snowball German test set |
| 478 |
|
with codecs.open(TESTDIR+'/corpora/snowball_german.csv', |
| 479 |
|
encoding='utf-8') as snowball_testset: |
| 480 |
|
next(snowball_testset) |
| 481 |
|
for line in snowball_testset: |
| 482 |
|
if line[0] != '#': |
| 483 |
|
line = line.strip().split(',') |
| 484 |
|
word, stem = line[0], line[1] |
| 485 |
|
self.assertEqual(sb_german(word), stem.lower()) |
| 486 |
|
|
| 487 |
|
# missed branch test cases |
| 488 |
|
self.assertEqual(sb_german('ikeit'), 'ikeit') |
| 489 |
|
|
| 490 |
|
def test_sb_german_snowball_alt(self): |
| 491 |
|
"""Test abydos.stemmer.sb_german (alternate vowels).""" |
|
@@ 601-618 (lines=18) @@
|
| 598 |
|
word, stem = line[0], line[1] |
| 599 |
|
self.assertEqual(sb_swedish(word), stem.lower()) |
| 600 |
|
|
| 601 |
|
def test_sb_danish_snowball(self): |
| 602 |
|
"""Test abydos.stemmer.sb_danish (Snowball testset). |
| 603 |
|
|
| 604 |
|
These test cases are from |
| 605 |
|
http://snowball.tartarus.org/algorithms/danish/diffs.txt |
| 606 |
|
""" |
| 607 |
|
# base case |
| 608 |
|
self.assertEqual(sb_danish(''), '') |
| 609 |
|
|
| 610 |
|
# Snowball Danish test set |
| 611 |
|
with codecs.open(TESTDIR+'/corpora/snowball_danish.csv', |
| 612 |
|
encoding='utf-8') as snowball_testset: |
| 613 |
|
next(snowball_testset) |
| 614 |
|
for line in snowball_testset: |
| 615 |
|
if line[0] != '#': |
| 616 |
|
line = line.strip().split(',') |
| 617 |
|
word, stem = line[0], line[1] |
| 618 |
|
self.assertEqual(sb_danish(word), stem.lower()) |
| 619 |
|
|
| 620 |
|
|
| 621 |
|
class CLEFTestCases(unittest.TestCase): |
|
@@ 582-599 (lines=18) @@
|
| 579 |
|
word, stem = line[0], line[1] |
| 580 |
|
self.assertEqual(sb_norwegian(word), stem.lower()) |
| 581 |
|
|
| 582 |
|
def test_sb_swedish_snowball(self): |
| 583 |
|
"""Test abydos.stemmer.sb_swedish (Snowball testset). |
| 584 |
|
|
| 585 |
|
These test cases are from |
| 586 |
|
http://snowball.tartarus.org/algorithms/swedish/diffs.txt |
| 587 |
|
""" |
| 588 |
|
# base case |
| 589 |
|
self.assertEqual(sb_swedish(''), '') |
| 590 |
|
|
| 591 |
|
# Snowball Swedish test set |
| 592 |
|
with codecs.open(TESTDIR+'/corpora/snowball_swedish.csv', |
| 593 |
|
encoding='utf-8') as snowball_testset: |
| 594 |
|
next(snowball_testset) |
| 595 |
|
for line in snowball_testset: |
| 596 |
|
if line[0] != '#': |
| 597 |
|
line = line.strip().split(',') |
| 598 |
|
word, stem = line[0], line[1] |
| 599 |
|
self.assertEqual(sb_swedish(word), stem.lower()) |
| 600 |
|
|
| 601 |
|
def test_sb_danish_snowball(self): |
| 602 |
|
"""Test abydos.stemmer.sb_danish (Snowball testset). |
|
@@ 563-580 (lines=18) @@
|
| 560 |
|
# missed branch test cases |
| 561 |
|
self.assertEqual(sb_dutch('zondulielijk'), 'zondulie') |
| 562 |
|
|
| 563 |
|
def test_sb_norwegian_snowball(self): |
| 564 |
|
"""Test abydos.stemmer.sb_norwegian (Snowball testset). |
| 565 |
|
|
| 566 |
|
These test cases are from |
| 567 |
|
http://snowball.tartarus.org/algorithms/norwegian/diffs.txt |
| 568 |
|
""" |
| 569 |
|
# base case |
| 570 |
|
self.assertEqual(sb_norwegian(''), '') |
| 571 |
|
|
| 572 |
|
# Snowball Norwegian test set |
| 573 |
|
with codecs.open(TESTDIR+'/corpora/snowball_norwegian.csv', |
| 574 |
|
encoding='utf-8') as snowball_testset: |
| 575 |
|
next(snowball_testset) |
| 576 |
|
for line in snowball_testset: |
| 577 |
|
if line[0] != '#': |
| 578 |
|
line = line.strip().split(',') |
| 579 |
|
word, stem = line[0], line[1] |
| 580 |
|
self.assertEqual(sb_norwegian(word), stem.lower()) |
| 581 |
|
|
| 582 |
|
def test_sb_swedish_snowball(self): |
| 583 |
|
"""Test abydos.stemmer.sb_swedish (Snowball testset). |