processors.tests.test_ner   A
last analyzed

Complexity

Total Complexity 4

Size/Duplication

Total Lines 50
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 4
eloc 26
dl 0
loc 50
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A NERTests.test_bio_nes() 0 14 2
A NERTests.test_obama_nes() 0 13 2
1
# -*- coding: utf-8 -*-
2
3
import unittest
4
from processors import *
5
import os
6
7
8
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
9
10
'''
11
Testing named entity recognition. 
12
IOB notation should be neutralized for bionlp
13
'''
14
15
class NERTests(unittest.TestCase):
16
17
	def test_bio_nes(self):
18
		json_file = os.path.join(__location__,'serialized_biodoc.json')
19
		with open(json_file) as jf:
20
			biodoc = Document.load_from_JSON(json.load(jf))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Document does not seem to be defined.
Loading history...
21
		#test .nes for biodoc
22
		#print(biodoc.nes)
23
		b1_gold_dict = {'TissueType': ['ventral nerve cord', 'neuron', 'nervous system'], 'CellType': ['neurons', 'neurons', 'neurons', 'neurons']}
24
		self.assertEqual(b1_gold_dict, biodoc.nes, "document-level nes dict for IOB entities was ill-formed")
25
26
		#test .nes for sentence
27
		s = biodoc.sentences[0]
28
		#print(s.nes)
29
		s1_gold_dict = {'CellType': ['neurons'], 'TissueType': ['ventral nerve cord']}
30
		self.assertEqual(s1_gold_dict, s.nes, "sentence-level nes dict for IOB entities was ill-formed")
31
32
	#test non-bio text
33
	def test_obama_nes(self):
34
		json_file = os.path.join(__location__,'serialized_obama.json')
35
		with open(json_file) as jf:
36
			doc = Document.load_from_JSON(json.load(jf))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable Document does not seem to be defined.
Loading history...
37
		#test .nes for doc
38
		#print(doc.nes)
39
		d1_gold_dict = {'ORDINAL': ['44th', 'first', 'first'], 'LOCATION': ['US', 'United States', 'United States', 'Honolulu', 'Hawaii', 'Chicago', '13th District', 'United States'], 'NUMBER': ['1', '2', 'three'], 'DATE': ['August 4 , 1961', '1992 and 2004', '1997 to 2004', '2000'], 'ORGANIZATION': ['Columbia University', 'Harvard Law School', 'Harvard Law Review', 'University of Chicago Law School', 'Illinois Senate', 'House of Representatives'], 'MISC': ['American', 'African American', 'Democratic'], 'PERSON': ['Barack Hussein Obama II', 'Obama', 'Bobby Rush']}
40
		self.assertEqual(d1_gold_dict, doc.nes, "document-level nes dict for non-IOB entities was ill-formed")
41
		#test .nes for sentence
42
		s = doc.sentences[0]
43
		#print(s.nes)
44
		s1_gold_dict = {'ORDINAL': ['44th'], 'DATE': ['August 4 , 1961'], 'NUMBER': ['1', '2'], 'LOCATION': ['US', 'United States'], 'ORGANIZATION': [], 'MISC': ['American'], 'PERSON': ['Barack Hussein Obama II']}
45
		self.assertEqual(s1_gold_dict, s.nes, "sentence-level nes dict for non-IOB entities was ill-formed")
46
47
48
if __name__ == "__main__":
49
    unittest.main()
50