| 1 |  |  | # -*- coding: utf-8 -*- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | # use data structures | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from __future__ import unicode_literals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from processors.ds import Document, Sentence, DirectedGraph | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from processors.utils import post_json | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import json | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | class Processor(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     Base Processor for text annotation (tokenization, sentence splitting, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     parsing, lemmatization, PoS tagging, named entity recognition, chunking, etc.). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     address : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |         The base address for the API (i.e., everything preceding `/api/..`) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     Attributes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     service : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |         The API endpoint for `annotate` requests. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     Methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     annotate(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         Produces an annotated `Document` from the provided text. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     annotate_from_sentences(sentences) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |         Produces an annotated `Document` from a [str] of text already split into sentences. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     def __init__(self, address): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         self.service = "{}/api/annotate".format(address) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     def _message_to_json_dict(self, msg): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |         return post_json(self.service, msg.to_JSON()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     def _annotate_message(self, msg): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         annotated_text = post_json(self.service, msg.to_JSON()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |         return Document.load_from_JSON(annotated_text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     def annotate(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         Annotate text (tokenization, sentence splitting, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         parsing, lemmatization, PoS tagging, named entity recognition, chunking, etc.) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         text : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |             `text` to be annotated. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         processors.ds.Document or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |             An annotated Document composed of `sentences`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |             # load json and build Sentences and Document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |             msg = Message(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |             return self._annotate_message(msg) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         except Exception as e: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |             #print(e) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |             return None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     def annotate_from_sentences(self, sentences): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         Annotate text that has already been segmented into `sentences`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |         ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         sentences : [str] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |             A list of str representing text already split into sentences. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         processors.ds.Document or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |             An annotated `Document` composed of `sentences`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             # load json from str interable and build Sentences and Document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |             msg = SegmentedMessage(sentences) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |             return self._annotate_message(msg) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         except Exception as e: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |             #print(e) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |             return None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  | class CluProcessor(Processor): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |     Processor for text annotation based on [`org.clulab.processors.clu.CluProcessor`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/clu/CluProcessor.scala) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |     Uses the Malt parser. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     def __init__(self, address): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         self.service = "{}/api/clu/annotate".format(address) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     def annotate(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         return super(CluProcessor, self).annotate(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  | class FastNLPProcessor(Processor): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     Processor for text annotation based on [`org.clulab.processors.fastnlp.FastNLPProcessor`](https://github.com/clulab/processors/blob/master/corenlp/src/main/scala/org/clulab/processors/fastnlp/FastNLPProcessor.scala) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |     Uses the Stanford CoreNLP neural network parser. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |     def __init__(self, address): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         self.address = address | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         self.service = "{}/api/fastnlp/annotate".format(address) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         self.chunk_address = "{}/api/fastnlp/chunk".format(self.address) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |     def annotate(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         return super(FastNLPProcessor, self).annotate(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |     def _chunk(self, obj): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         return post_json(self.chunk_address, obj.to_JSON()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |     def chunk_sentence(self, sentence): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         res = self._chunk(sentence) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         return Sentence.load_from_JSON(res) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |     def chunk_document(self, doc): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         res = self._chunk(doc) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         return Document.load_from_JSON(res) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  | class BioNLPProcessor(Processor): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |     Processor for biomedical text annotation based on [`org.clulab.processors.fastnlp.FastNLPProcessor`](https://github.com/clulab/processors/blob/master/corenlp/src/main/scala/org/clulab/processors/fastnlp/FastNLPProcessor.scala) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |     CoreNLP-derived annotator. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |     def __init__(self, address): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         self.service = "{}/api/bionlp/annotate".format(address) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |     def annotate(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |         return super(BioNLPProcessor, self).annotate(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  | class Message(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |     A storage class for passing `text` to API `annotate` endpoint. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |     Attributes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |     ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |     text : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         The `text` to be annotated. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |     Methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |     ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |     to_JSON() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         Produces a json str in the structure expected by the API `annotate` endpoint. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |     def __init__(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         self.text = text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |     def to_JSON_dict(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |         jdict = dict() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |         jdict["text"] = self.text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |         return jdict | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |     def to_JSON(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |         return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  | class SegmentedMessage(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |     A storage class for passing text already split into sentences to API `annotate` endpoint. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |     Attributes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |     ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |     segments : [str] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |         Text to be annotated that has already been split into sentences.  This segmentation is preserved during annotation. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |     Methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |     ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |     to_JSON() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |         Produces a json str in the structure expected by the API `annotate` endpoint. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |     def __init__(self, segments): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |         self.segments = segments | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |     def to_JSON_dict(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |         jdict = dict() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |         jdict["segments"] = self.segments | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |         return jdict | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 199 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 200 |  |  |     def to_JSON(self): | 
            
                                                        
            
                                    
            
            
                | 201 |  |  |         return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4) | 
            
                                                        
            
                                    
            
            
                | 202 |  |  |  |