Completed
Push — master ( 64c530...5ed582 )
by Gus
9s
created

NERTests.test_bio_nes()   A

Complexity

Conditions 2

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 2
c 2
b 0
f 0
dl 0
loc 14
rs 9.4285
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
import unittest
5
from processors import *
6
import os
7
8
9
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
10
11
'''
12
Testing named entity recognition. 
13
IOB notation should be neutralized for bionlp
14
'''
15
16
class NERTests(unittest.TestCase):
17
18
	def test_bio_nes(self):
19
		json_file = os.path.join(__location__,'serialized_biodoc.json')
20
		with open(json_file) as jf:
21
			biodoc = Document.load_from_JSON(json.load(jf))
22
		#test .nes for biodoc
23
		#print(biodoc.nes)
24
		b1_gold_dict = {'TissueType': ['ventral nerve cord', 'neuron', 'nervous system'], 'CellType': ['neurons', 'neurons', 'neurons', 'neurons']}
25
		self.assertEqual(b1_gold_dict, biodoc.nes, "document-level nes dict for IOB entities was ill-formed")
26
27
		#test .nes for sentence
28
		s = biodoc.sentences[0]
29
		#print(s.nes)
30
		s1_gold_dict = {'CellType': ['neurons'], 'TissueType': ['ventral nerve cord']}
31
		self.assertEqual(s1_gold_dict, s.nes, "sentence-level nes dict for IOB entities was ill-formed")
32
33
	#test non-bio text
34
	def test_obama_nes(self):
35
		json_file = os.path.join(__location__,'serialized_obama.json')
36
		with open(json_file) as jf:
37
			doc = Document.load_from_JSON(json.load(jf))
38
		#test .nes for doc
39
		#print(doc.nes)
40
		d1_gold_dict = {'ORDINAL': ['44th', 'first', 'first'], 'LOCATION': ['US', 'United States', 'United States', 'Honolulu', 'Hawaii', 'Chicago', '13th District', 'United States'], 'NUMBER': ['1', '2', 'three'], 'DATE': ['August 4 , 1961', '1992 and 2004', '1997 to 2004', '2000'], 'ORGANIZATION': ['Columbia University', 'Harvard Law School', 'Harvard Law Review', 'University of Chicago Law School', 'Illinois Senate', 'House of Representatives'], 'MISC': ['American', 'African American', 'Democratic'], 'PERSON': ['Barack Hussein Obama II', 'Obama', 'Bobby Rush']}
41
		self.assertEqual(d1_gold_dict, doc.nes, "document-level nes dict for non-IOB entities was ill-formed")
42
		#test .nes for sentence
43
		s = doc.sentences[0]
44
		#print(s.nes)
45
		s1_gold_dict = {'ORDINAL': ['44th'], 'DATE': ['August 4 , 1961'], 'NUMBER': ['1', '2'], 'LOCATION': ['US', 'United States'], 'ORGANIZATION': [], 'MISC': ['American'], 'PERSON': ['Barack Hussein Obama II']}
46
		self.assertEqual(s1_gold_dict, s.nes, "sentence-level nes dict for non-IOB entities was ill-formed")
47
48
49
if __name__ == "__main__":
50
    unittest.main()