Completed
Branch master (6c7f40)
by Chris
01:15
created

namebot.tests.RemoveBadWordsTestCase.test_stem_words()   A

Complexity

Conditions 2

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 8
rs 9.4285
1
import unittest
2
from namebot import normalization as norm
3
4
5
class RemoveOddWordTestCase(unittest.TestCase):
6
    def test_remove_odd_sounding_words(self):
7
        """Test four nonsense inputs that should get
8
        captured by the regexes."""
9
        original = ['bking', 'aaaeee', 'flower', 'rabbit']
10
        original_count = len(original)
11
        updated = norm.remove_odd_sounding_words(original)
12
        new_count = len(updated)
13
        self.assertEqual(new_count, original_count - 2)
14
15
    def test_no_remove_odd_sounding_words(self):
16
        """Test bad inputs should not get removed."""
17
        original = ['flower', 'rabbit']
18
        original_count = len(original)
19
        updated = norm.remove_odd_sounding_words(original)
20
        new_count = len(updated)
21
        self.assertEqual(new_count, original_count)
22
23
    def test_none_remove_odd_sounding_words(self):
24
        """Tests that no input is returned without looping"""
25
        original = None
26
        updated = norm.remove_odd_sounding_words(original)
27
        self.assertEqual(updated, original)
28
29
    def test_empty_remove_odd_sounding_words(self):
30
        """Tests that empty list is returned without looping"""
31
        original = []
32
        updated = norm.remove_odd_sounding_words(original)
33
        self.assertEqual(len(original), 0)
34
        self.assertEqual(original, updated)
35
36
37
class StemWordsTestCase(unittest.TestCase):
38
    def test_stem_words(self):
39
        """Tests stemmer is working"""
40
        words = ['running', 'jumping']
41
        stemmed = norm.stem_words(words)
42
        self.assertEqual(['run', 'jump'], stemmed)
43
44
    def test_no_stem_words(self):
45
        """Tests stemmer is not stemming root words"""
46
        words = ['run', 'jump']
47
        stemmed = norm.stem_words(words)
48
        self.assertEqual(words, stemmed)
49
50
51
class RemoveBadWordsTestCase(unittest.TestCase):
52
    def test_stem_words(self):
53
        """Tests bad words are getting filtered out."""
54
        bad_words = ['fuck', 'pussy', 'cunt']
55
        words = bad_words + ['cool', 'neat', 'rad']
56
        cleaned = norm.remove_bad_words(words)
57
        self.assertNotEqual(bad_words, cleaned)
58
        for bad_word in bad_words:
59
            self.assertFalse(bad_word in cleaned)
60
61
62
class RemoveStopWordsTestCase(unittest.TestCase):
63
    def test_filter_long_words(self):
64
        """Test that no stop words were kept"""
65
        stop_words = ['the', 'is', 'are', 'am', 'but']
66
        filtered = norm.remove_stop_words(stop_words)
67
        self.assertEqual(len(filtered), 0)
68
69
70
class FilterWordsTestCase(unittest.TestCase):
71
    def test_filter_long_words(self):
72
        """Tests that very long words are filtered out"""
73
        long_words = ['areallyverylongword', 'anextrareallyverylongword']
74
        words = long_words + ['normal', 'words']
75
        filtered = norm.filter_words(words)
76
        for long_word in long_words:
77
            self.assertFalse(long_word in filtered)
78
79
80
class UniquifyTestCase(unittest.TestCase):
81
    def test_uniquify(self):
82
        words = ['cool', 'neat', 'cool', 'cool', 'neat']
83
        updated = norm.uniquify(words)
84
        self.assertEqual(len(updated), 2)
85
86
87
class CleanSortTestCase(unittest.TestCase):
88
    def test_clean_sort(self):
89
        words = ['!@foobar!#', 'ba3z!@#33_', 'bam!333____#33']
90
        cleaned = norm.clean_sort(words)
91
        self.assertEqual(cleaned, ['foobar', 'baz', 'bam'])
92
93
    def test_clean_string(self):
94
        val = '!@foobar!#'
95
        cleaned = norm.clean_sort(val)
96
        self.assertEqual(cleaned, val)
97
98
99
class ChopDuplicateEndsTestCase(unittest.TestCase):
100
101
    def test_basic(self):
102
        chopped = norm.chop_duplicate_ends('aabracadabraa')
103
        self.assertEqual(chopped, 'abracadabra')
104
105
    def test_one_end(self):
106
        chopped1 = norm.chop_duplicate_ends('aabracadabra')
107
        chopped2 = norm.chop_duplicate_ends('abracadabraa')
108
        self.assertEqual(chopped1, 'abracadabra')
109
        self.assertEqual(chopped2, 'abracadabra')
110
111
112
class KeyWordsByPosTagTestCase(unittest.TestCase):
113
114
    def test_basic(self):
115
        data = [
116
            ('Monkey', 'NNP'), ('Fly', 'VBG'), ('Fly', 'RB'),
117
            ('Dog', 'NNP'), ('Cat', 'NNP')]
118
        keyed = norm.key_words_by_pos_tag(data)
119
        expected = {
120
            'RB': ['Fly'],
121
            'NNP': ['Monkey', 'Dog', 'Cat'],
122
            'VBG': ['Fly']
123
        }
124
        self.assertEqual(dict(keyed), expected)
125