processors.tests.test_api   A
last analyzed

Complexity

Total Complexity 22

Size/Duplication

Total Lines 243
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 22
eloc 141
dl 0
loc 243
rs 10
c 0
b 0
f 0

20 Methods

Rating   Name   Duplication   Size   Complexity  
A ProcessorsAPITests.test_sentiment_analysis_score_method() 0 15 1
A ProcessorsAPITests.test_bionlp() 0 12 1
A ProcessorsAPITests.test_unicode() 0 7 1
A ProcessorsAPITests.test_odin_extract_from_text_method2() 0 11 1
A ProcessorsAPITests.test_sentence_equality() 0 10 1
A ProcessorsAPITests.test_odin_extract_from_document_method2() 0 12 1
A ProcessorsAPITests.test_odin_mentions_with_triggers() 0 14 3
A ProcessorsAPITests.test_dependencies_equality() 0 9 1
A ProcessorsAPITests.test_sentiment_analysis_of_text() 0 5 1
A ProcessorsAPITests.test_annotate_from_sentences() 0 9 1
A ProcessorsAPITests.test_api() 0 4 1
A ProcessorsAPITests.test_doc_equality() 0 8 1
A ProcessorsAPITests.test_sentiment_analysis_of_document() 0 7 1
A ProcessorsAPITests.test_shutdown() 0 4 1
A ProcessorsAPITests.test_odin_extract_from_text_method() 0 19 1
A ProcessorsAPITests.test_sentiment_analysis_of_sentence() 0 8 1
A ProcessorsAPITests.test_sentiment_analysis_of_segemented_text() 0 6 1
A ProcessorsAPITests.test_annotate() 0 10 1
A ProcessorsAPITests.test_odin_extract_from_document_method() 0 20 1
A ProcessorsAPITests.test_fastnlp() 0 10 1
1
# -*- coding: utf-8 -*-
2
from __future__ import unicode_literals
3
from codecs import open
4
import unittest
5
from processors import *
6
import os
7
8
# used to load resources under /tests
9
test_dir = os.path.dirname(__file__)
10
11
port = 8886
12
# initialize the server
13
API = ProcessorsAPI(port=port, timeout=180, jvm_mem="-Xmx5G", hostname="127.0.0.1", keep_alive=True)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable ProcessorsAPI does not seem to be defined.
Loading history...
14
15
class ProcessorsAPITests(unittest.TestCase):
16
17
    def test_api(self):
18
        "ProcessorsAPI instance should remember its port"
19
20
        self.assertEqual(API.port, port, "Port was not {}".format(port))
21
22
    # annotate tests
23
    def test_annotate(self):
24
        "API.annotate should produce a Document when given text"
25
26
        text = "This is sentence 1.  This is sentence 2."
27
        # .annotate should be successful
28
        doc = API.annotate(text)
29
        self.assertNotEqual(doc, None, ".annotate failed to produce a Document")
30
        # should have two sentences
31
        num_sentences = 2
32
        self.assertEqual(len(doc.sentences), num_sentences, ".annotate did not produce a Document with {} Sentences for text \"{}\"".format(num_sentences, text))
33
34
    def test_doc_equality(self):
35
        "Two calls to API.annotate using the same text should produce equivalent Documents"
36
37
        text = "My name is Inigo Montoya."
38
        doc1 = API.annotate(text)
39
        doc2 = API.annotate(text)
40
        self.assertEqual(doc1, doc2, "two .annotate calls on same text did not produce equivalent Documents")
41
        self.assertEqual(doc1, Document.load_from_JSON(json.loads(doc2.to_JSON())), "loading JSON dumped from one Document should produce an equivalent Document")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Document does not seem to be defined.
Loading history...
42
43
    def test_sentence_equality(self):
44
        "Two calls to API.annotate using the same text should produce equivalent Sentences"
45
46
        text = "My name is Inigo Montoya."
47
        doc1 = API.annotate(text)
48
        d1s1 = doc1.sentences[0]
49
        doc2 = API.annotate(text)
50
        d2s1 = doc2.sentences[0]
51
        self.assertEqual(d1s1, d2s1, "two .annotate calls on same text did not produce equivalent Sentences")
52
        self.assertEqual(d1s1, Sentence.load_from_JSON(json.loads(d1s1.to_JSON())), "loading JSON dumped from one Sentence should produce an equivalent Sentence")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Sentence does not seem to be defined.
Loading history...
53
54
    def test_dependencies_equality(self):
55
        "Two calls to API.annotate using the same text should produce equivalent syntactic Dependencies"
56
57
        text = "My name is Inigo Montoya."
58
        doc1 = API.annotate(text)
59
        d1s1 = doc1.sentences[0]
60
        doc2 = API.annotate(text)
61
        d2s1 = doc2.sentences[0]
62
        self.assertEqual(d1s1.dependencies, d2s1.dependencies, "two .annotate calls on same text did not produce equivalent Dependencies")
63
64
    def test_unicode(self):
65
        "API.annotate should produce a Document when given text containg unicode"
66
67
        # the server will do a poor job with non-English text, but it should still produce something...
68
        text = "頑張らなきゃならい"
69
        doc = API.annotate(text)
70
        self.assertNotEqual(doc, None, ".annotate failed to produce a Document")
71
72
    # annotate_from_sentences tests
73
    def test_annotate_from_sentences(self):
74
        "API.annotate_from_sentences should produce a Document that preserves the provided sentence segmentation"
75
76
        sentences = ["This is sentence 1.", "This is sentence 2."]
77
        # .annotate should be successful
78
        doc = API.annotate_from_sentences(sentences)
79
        self.assertNotEqual(doc, None, ".annotate_from_sentences failed to produce a Document")
80
        # should have two sentences
81
        self.assertEqual(len(doc.sentences), len(sentences), ".annotate_from_sentences did not produce a Document with the correct number of sentences")
82
83
    def test_fastnlp(self):
84
        "API.fastnlp.annotate should produce a Document when given text"
85
86
        text = "This is sentence 1.  This is sentence 2."
87
        # .annotate should be successful
88
        doc = API.fastnlp.annotate(text)
89
        self.assertNotEqual(doc, None, "fastnlp.annotate failed to produce a Document")
90
        # should have two sentences
91
        num_sentences = 2
92
        self.assertEqual(len(doc.sentences), num_sentences, "fastnlp.annotate did not produce a Document with {} Sentences for text \"{}\"".format(num_sentences, text))
93
94
    def test_bionlp(self):
95
        "API.bionlp.annotate should produce a Document when given text"
96
97
        text = "Ras phosphorylated Mek."
98
        # .annotate should be successful
99
        doc = API.bionlp.annotate(text)
100
        # once more for fickle travis build
101
        doc = API.bionlp.annotate(text)
102
        self.assertNotEqual(doc, None, "bionlp.annotate failed to produce a Document")
103
        # should have two sentences
104
        num_sentences = 1
105
        self.assertEqual(len(doc.sentences), num_sentences, "bionlp.annotate did not produce a Document with {} Sentences for text \"{}\"".format(num_sentences, text))
106
107
    # sentiment analysis tests
108
    def test_sentiment_analysis_of_text(self):
109
        "API.sentiment.corenlp.score_text should return scores for text"
110
111
        scores = API.sentiment.corenlp.score_text("This is a very sad sentence.")
112
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the text")
113
114
    def test_sentiment_analysis_of_document(self):
115
        "API.sentiment.corenlp.score_document should return scores for Document"
116
117
        text = "This is a terribly sad sentence."
118
        doc = API.annotate(text)
119
        scores = API.sentiment.corenlp.score_document(doc)
120
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the Document")
121
122
    def test_sentiment_analysis_of_sentence(self):
123
        "API.sentiment.corenlp.score_sentence should return a score for a Sentence"
124
125
        text = "This is a terribly sad sentence."
126
        doc = API.annotate(text)
127
        s = doc.sentences[0]
128
        score = API.sentiment.corenlp.score_sentence(s)
129
        self.assertIsInstance(score, int, "score for Sentence should be of type int, but was of type {}".format(type(score)))
130
131
    def test_sentiment_analysis_of_segemented_text(self):
132
        "API.sentiment.corenlp.score_segemented_text should return a score for each sentence its provided"
133
134
        sentences = ["This is a terribly sad sentence.", "I'm pretty happy, though :) !"]
135
        scores = API.sentiment.corenlp.score_segmented_text(sentences)
136
        self.assertTrue(len(scores) == len(sentences), "there should be {} scores, but only {} were produced :(".format(len(sentences), len(scores)))
137
138
    def test_sentiment_analysis_score_method(self):
139
        "API.sentiment.corenlp.score should be able to determine the appropriate API endpoint for the given parameter"
140
141
        # test with text
142
        text = "This is a terribly sad sentence."
143
        scores = API.sentiment.corenlp.score(text)
144
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the text")
145
        # test with Document
146
        doc = API.annotate(text)
147
        scores = API.sentiment.corenlp.score(doc)
148
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the Document")
149
        # test with Sentence
150
        s = doc.sentences[0]
151
        score = API.sentiment.corenlp.score(s)
152
        self.assertIsInstance(score, int, "score for Sentence should be of type int, but was of type {}".format(type(score)))
153
154
    # Odin tests
155
    def test_odin_extract_from_text_method(self):
156
        "API.odin.extract_from_text should return mentions whenever rules match the text"
157
158
        rules = """
159
        - name: "ner-person"
160
          label: [Person, PossiblePerson, Entity]
161
          priority: 1
162
          type: token
163
          pattern: |
164
           [entity="PERSON"]+
165
           |
166
           [tag=/^N/]* [tag=/^N/ & outgoing="cop"] [tag=/^N/]*
167
        """
168
        text = 'Inigo Montoya should be flagged as a Person.'
169
        mentions = API.odin.extract_from_text(text, rules)
170
        self.assertTrue(len(mentions) == 1, "More than one mention found for text.")
171
        m = mentions[0]
172
        self.assertIsInstance(m, Mention, "m wasn't a Mention")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Mention does not seem to be defined.
Loading history...
173
        self.assertEqual(m.label, "Person", "Label of Mention was not \"Person\"")
174
175
    def test_odin_extract_from_text_method2(self):
176
        "API.odin.extract_from_text should be capable of handling a URL pointing to a yaml (rules) file"
177
178
        rules_url = "https://gist.githubusercontent.com/myedibleenso/6eb94696be6e31c46597759387993baf/raw/b9476eba888567597ff7e8bc2f7aa018561fad6c/py-processors-test.yml"
179
        text = 'Inigo Montoya should be flagged as a Person.'
180
        mentions = API.odin.extract_from_text(text, rules_url)
181
        self.assertTrue(len(mentions) != 0, "No mentions were found")
182
        m = mentions[0]
183
        self.assertIsInstance(m, Mention, "m wasn't a Mention")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Mention does not seem to be defined.
Loading history...
184
        person_mentions = [m for m in mentions if m.label == "Person"]
185
        self.assertTrue(len(person_mentions) == 1, "{} \"Person\" Mentions found, but 1 expected.".format(len(person_mentions)))
186
187
    def test_odin_extract_from_document_method(self):
188
        "API.odin.extract_from_document should return mentions whenever rules match the text"
189
190
        rules = """
191
        - name: "ner-person"
192
          label: [Person, PossiblePerson, Entity]
193
          priority: 1
194
          type: token
195
          pattern: |
196
           [entity="PERSON"]+
197
           |
198
           [tag=/^N/]* [tag=/^N/ & outgoing="cop"] [tag=/^N/]*
199
        """
200
        text = 'Inigo Montoya should be flagged as a Person.'
201
        doc = API.annotate(text)
202
        mentions = API.odin.extract_from_document(doc, rules)
203
        self.assertTrue(len(mentions) == 1, "More than one mention found for text.")
204
        m = mentions[0]
205
        self.assertIsInstance(m, Mention, "m wasn't a Mention")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Mention does not seem to be defined.
Loading history...
206
        self.assertEqual(m.label, "Person", "Label of Mention was not \"Person\"")
207
208
    def test_odin_extract_from_document_method2(self):
209
        "API.odin.extract_from_document should be capable of handling a URL pointing to a yaml (rules) file"
210
211
        rules_url = "https://gist.githubusercontent.com/myedibleenso/6eb94696be6e31c46597759387993baf/raw/b9476eba888567597ff7e8bc2f7aa018561fad6c/py-processors-test.yml"
212
        text = 'Inigo Montoya should be flagged as a Person.'
213
        doc = API.annotate(text)
214
        mentions = API.odin.extract_from_document(doc, rules_url)
215
        self.assertTrue(len(mentions) != 0, "No mentions were found")
216
        m = mentions[0]
217
        self.assertIsInstance(m, Mention,  "m wasn't a Mention")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Mention does not seem to be defined.
Loading history...
218
        person_mentions = [m for m in mentions if m.label == "Person"]
219
        self.assertTrue(len(person_mentions) == 1, "{} \"Person\" Mentions found, but 1 expected.".format(len(person_mentions)))
220
221
    def test_odin_mentions_with_triggers(self):
222
        "the trigger of a Mention should be a Mention"
223
224
        text_file = os.path.join(test_dir, 'obama.txt')
225
        rule_file = os.path.join(test_dir, 'example-rules.yml')
226
        with open(text_file, 'r', 'utf-8') as f:
227
            text = f.read().strip()
228
        with open(rule_file, 'r', 'utf-8') as f:
229
            rules = f.read().strip()
230
        mentions = API.odin.extract_from_text(text, rules)
231
        self.assertNotEqual(mentions, None, "Didn't find any mentions")
232
        triples = [m for m in mentions if m.label == "Triple"]
233
        self.assertNotEqual(triples, None, "Didn't find any mentions with the label \"Triple\" when using {} with {}".format(rule_file, text_file))
234
        self.assertIsInstance(triples[0].trigger, Mention, "triple[0].trigger was not a Mention")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Mention does not seem to be defined.
Loading history...
235
236
    def test_shutdown(self):
237
        "api.stop_server() should stop processors-server.jar"
238
239
        self.assertTrue(API.stop_server(), "Failed to shut down processors-server.jar")
240
241
if __name__ == "__main__":
242
    unittest.main()
243