Completed
Push — master ( d242d8...16b8b8 )
by Gus
01:27
created

ProcessorsAPITests.test_annotate()   A

Complexity

Conditions 1

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
c 2
b 0
f 0
dl 0
loc 10
rs 9.4285
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
from __future__ import unicode_literals
4
import unittest
5
from processors import *
6
7
8
port = 8886
9
# initialize the server
10
API = ProcessorsAPI(port=port, timeout=180, keep_alive=True)
11
12
class ProcessorsAPITests(unittest.TestCase):
13
14
    def test_api(self):
15
        "ProcessorsAPI instance should remember its port"
16
17
        self.assertEqual(API.port, port, "Port was not {}".format(port))
18
19
    # annotate tests
20
    def test_annotate(self):
21
        "API.annotate should produce a Document when given text"
22
23
        text = "This is sentence 1.  This is sentence 2."
24
        # .annotate should be successful
25
        doc = API.annotate(text)
26
        self.assertNotEqual(doc, None, ".annotate failed to produce a Document")
27
        # should have two sentences
28
        num_sentences = 2
29
        self.assertEqual(len(doc.sentences), num_sentences, ".annotate did not produce a Document with {} Sentences for text \"{}\"".format(num_sentences, text))
30
31
    def test_unicode(self):
32
        "API.annotate should produce a Document when given text containg unicode"
33
        # the server will do a poor job with non-English text, but it should still produce something...
34
        text = "頑張らなきゃならい"
35
        doc = API.annotate(text)
36
        self.assertNotEqual(doc, None, ".annotate failed to produce a Document")
37
38
    # annotate_from_sentences tests
39
    def test_annotate_from_sentences(self):
40
        "API.annotate_from_sentences should produce a Document that preserves the provided sentence segmentation"
41
42
        sentences = ["This is sentence 1.", "This is sentence 2."]
43
        # .annotate should be successful
44
        doc = API.annotate_from_sentences(sentences)
45
        self.assertNotEqual(doc, None, ".annotate_from_sentences failed to produce a Document")
46
        # should have two sentences
47
        self.assertEqual(len(doc.sentences), len(sentences), ".annotate_from_sentences did not produce a Document with the correct number of sentences")
48
49
    def test_fastnlp(self):
50
        "API.fastnlp.annotate should produce a Document when given text"
51
52
        text = "This is sentence 1.  This is sentence 2."
53
        # .annotate should be successful
54
        doc = API.fastnlp.annotate(text)
55
        self.assertNotEqual(doc, None, "fastnlp.annotate failed to produce a Document")
56
        # should have two sentences
57
        num_sentences = 2
58
        self.assertEqual(len(doc.sentences), num_sentences, "fastnlp.annotate did not produce a Document with {} Sentences for text \"{}\"".format(num_sentences, text))
59
60
    def test_bionlp(self):
61
        "API.bionlp.annotate should produce a Document when given text"
62
63
        text = "Ras phosphorylated Mek."
64
        # .annotate should be successful
65
        doc = API.bionlp.annotate(text)
66
        self.assertNotEqual(doc, None, "bionlp.annotate failed to produce a Document")
67
        # should have two sentences
68
        num_sentences = 1
69
        self.assertEqual(len(doc.sentences), num_sentences, "bionlp.annotate did not produce a Document with {} Sentences for text \"{}\"".format(num_sentences, text))
70
71
    # sentiment analysis tests
72
    def test_sentiment_analysis_of_text(self):
73
        "API.sentiment.corenlp.score_text should return scores for text"
74
75
        scores = API.sentiment.corenlp.score_text("This is a very sad sentence.")
76
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the text")
77
78
    def test_sentiment_analysis_of_document(self):
79
        "API.sentiment.corenlp.score_document should return scores for Document"
80
81
        text = "This is a terribly sad sentence."
82
        doc = API.annotate(text)
83
        scores = API.sentiment.corenlp.score_document(doc)
84
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the Document")
85
86
    def test_sentiment_analysis_of_sentence(self):
87
        "API.sentiment.corenlp.score_sentence should return a score for a Sentence"
88
89
        text = "This is a terribly sad sentence."
90
        doc = API.annotate(text)
91
        s = doc.sentences[0]
92
        score = API.sentiment.corenlp.score_sentence(s)
93
        self.assertIsInstance(score, int, "score for Sentence should be of type int, but was of type {}".format(type(score)))
94
95
    def test_sentiment_analysis_of_segemented_text(self):
96
        "API.sentiment.corenlp.score_segemented_text should return a score for each sentence its provided"
97
98
        sentences = ["This is a terribly sad sentence.", "I'm pretty happy, though :) !"]
99
        scores = API.sentiment.corenlp.score_segmented_text(sentences)
100
        self.assertTrue(len(scores) == len(sentences), "there should be {} scores, but only {} were produced :(".format(len(sentences), len(scores)))
101
102
    def test_sentiment_analysis_score_method(self):
103
        "API.sentiment.corenlp.score should be able to determine the appropriate API endpoint for the given parameter"
104
        # test with text
105
        text = "This is a terribly sad sentence."
106
        scores = API.sentiment.corenlp.score(text)
107
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the text")
108
        # test with Document
109
        doc = API.annotate(text)
110
        scores = API.sentiment.corenlp.score(doc)
111 View Code Duplication
        self.assertTrue(len(scores) > 0, "there were no sentiment scores returned for the Document")
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
112
        # test with Sentence
113
        s = doc.sentences[0]
114
        score = API.sentiment.corenlp.score(s)
115
        self.assertIsInstance(score, int, "score for Sentence should be of type int, but was of type {}".format(type(score)))
116
117
    # Odin tests
118
    def test_odin_extract_from_text_method(self):
119
        "API.odin.extract_from_text should return mentions whenever rules match the text"
120
        rules = """
121
        - name: "ner-person"
122
          label: [Person, PossiblePerson, Entity]
123
          priority: 1
124
          type: token
125
          pattern: |
126
           [entity="PERSON"]+
127
           |
128
           [tag=/^N/]* [tag=/^N/ & outgoing="cop"] [tag=/^N/]*
129
        """
130
        text = 'Inigo Montoya should be flagged as a Person.'
131
        mentions = API.odin.extract_from_text(text, rules)
132
        self.assertTrue(len(mentions) == 1, "More than one mention found for text.")
133
        m = mentions[0]
134
        self.assertIsInstance(m, Mention, "m wasn't a Mention")
135
        self.assertEqual(m.label, "Person", "Label of Mention was not \"Person\"")
136
137
    def test_odin_extract_from_text_method2(self):
138
        "API.odin.extract_from_text should be capable of handling a URL pointing to a yaml (rules) file"
139
        rules_url = "https://raw.githubusercontent.com/clulab/reach/master/src/main/resources/edu/arizona/sista/demo/open/grammars/rules.yml"
140
        text = 'Inigo Montoya should be flagged as a Person.'
141
        mentions = API.odin.extract_from_text(text, rules_url)
142 View Code Duplication
        self.assertTrue(len(mentions) != 0, "No mentions were found")
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
143
        m = mentions[0]
144
        self.assertIsInstance(m, Mention, "m wasn't a Mention")
145
        person_mentions = [m for m in mentions if m.label == "Person"]
146
        self.assertTrue(len(person_mentions) == 1, "{} \"Person\" Mentions found, but 1 expected.".format(len(person_mentions)))
147
148
    def test_odin_extract_from_document_method(self):
149
        "API.odin.extract_from_document should return mentions whenever rules match the text"
150
        rules = """
151
        - name: "ner-person"
152
          label: [Person, PossiblePerson, Entity]
153
          priority: 1
154
          type: token
155
          pattern: |
156
           [entity="PERSON"]+
157
           |
158
           [tag=/^N/]* [tag=/^N/ & outgoing="cop"] [tag=/^N/]*
159
        """
160
        text = 'Inigo Montoya should be flagged as a Person.'
161
        doc = API.annotate(text)
162
        mentions = API.odin.extract_from_document(doc, rules)
163
        self.assertTrue(len(mentions) == 1, "More than one mention found for text.")
164
        m = mentions[0]
165
        self.assertIsInstance(m, Mention, "m wasn't a Mention")
166
        self.assertEqual(m.label, "Person", "Label of Mention was not \"Person\"")
167
168
    def test_odin_extract_from_document_method2(self):
169
        "API.odin.extract_from_document should be capable of handling a URL pointing to a yaml (rules) file"
170
        rules_url = "https://raw.githubusercontent.com/clulab/reach/master/src/main/resources/edu/arizona/sista/demo/open/grammars/rules.yml"
171
        text = 'Inigo Montoya should be flagged as a Person.'
172
        doc = API.annotate(text)
173
        mentions = API.odin.extract_from_document(doc, rules_url)
174
        self.assertTrue(len(mentions) != 0, "No mentions were found")
175
        m = mentions[0]
176
        self.assertIsInstance(m, Mention,  "m wasn't a Mention")
177
        person_mentions = [m for m in mentions if m.label == "Person"]
178
        self.assertTrue(len(person_mentions) == 1, "{} \"Person\" Mentions found, but 1 expected.".format(len(person_mentions)))
179
180
    def test_shutdown(self):
181
        "api.stop_server() should stop processors-server.jar"
182
183
        self.assertTrue(API.stop_server(), "Failed to shut down processors-server.jar")
184
185
if __name__ == "__main__":
186
    unittest.main()
187