| 1 |  |  | #!/usr/bin/env python | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | # -*- coding: utf-8 -*- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from __future__ import unicode_literals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from .utils import post_json | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from .ds import Document, Interval | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | import re | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import json | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | class Mention(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     A labeled span of text.  Used to model textual mentions of events, relations, and entities. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     token_interval : Interval | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |         The span of the Mention represented as an Interval. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |     sentence : int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |         The sentence index that contains the Mention. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     document : Document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         The Document in which the Mention was found. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     foundBy : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         The Odin IE rule that produced this Mention. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     label : str | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |         The label most closely associated with this span.  Usually the lowest hyponym of "labels". | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     labels: list | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |         The list of labels associated with this span. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     trigger: dict or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         dict of JSON for Mention's trigger (event predicate or word(s) signaling the Mention). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     arguments: dict or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |         dict of JSON for Mention's arguments. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     paths: dict or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         dict of JSON encoding the syntactic paths linking a Mention's arguments to its trigger (applies to Mentions produces from `type:"dependency"` rules). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     doc_id: str or None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         the id of the document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     Attributes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     tokenInterval: processors.ds.Interval | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         An `Interval` encoding the `start` and `end` of the `Mention`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |     start : int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |         The token index that starts the `Mention`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     end : int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         The token index that marks the end of the Mention (exclusive). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     sentenceObj : processors.ds.Sentence | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         Pointer to the `Sentence` instance containing the `Mention`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     characterStartOffset: int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         The index of the character that starts the `Mention`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     characterEndOffset: int | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         The index of the character that ends the `Mention`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     type: Mention.TBM or Mention.EM or Mention.RM | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         The type of the `Mention`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     See Also | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     -------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     [`Odin` manual](https://arxiv.org/abs/1509.07513) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     Methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     matches(label_pattern) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     TBM = "TextBoundMention" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |     EM = "EventMention" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     RM = "RelationMention" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     def __init__(self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |                 token_interval, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |                 sentence, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |                 document, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |                 foundBy, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |                 label, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |                 labels=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |                 trigger=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |                 arguments=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |                 paths=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |                 keep=True, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                 doc_id=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         self.label = label | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         self.labels = labels if labels else [self.label] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         self.tokenInterval = token_interval | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         self.start = self.tokenInterval.start | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         self.end = self.tokenInterval.end | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         self.document = document | 
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 89 |  | View Code Duplication |         self._doc_id = doc_id or hash(self.document) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         self.sentence = sentence | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         if trigger: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |             # NOTE: doc id is not stored for trigger's json, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             # as it is assumed to be contained in the same document as its parent | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             trigger.update({"document": self._doc_id}) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             self.trigger = Mention.load_from_JSON(trigger, self._to_document_map()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |             self.trigger = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         # unpack args | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         self.arguments = {role:[Mention.load_from_JSON(a, self._to_document_map()) for a in args] for (role, args) in arguments.items()} if arguments else None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |         self.paths = paths | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         self.keep = keep | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         self.foundBy = foundBy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         # other | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         self.sentenceObj = self.document.sentences[self.sentence] | 
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 105 |  | View Code Duplication |         self.text = " ".join(self.sentenceObj.words[self.start:self.end]) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         # recover offsets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |         self.characterStartOffset = self.sentenceObj.startOffsets[self.tokenInterval.start] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         self.characterEndOffset = self.sentenceObj.endOffsets[self.tokenInterval.end] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         # for later recovery | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         self.id = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         self.type = self._set_type() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     def __eq__(self, other): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         if isinstance(other, self.__class__): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |             return self.__dict__ == other.__dict__ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |             return False | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     def __ne__(self, other): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         return not self.__eq__(other) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |     def __str__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         return self.text | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 124 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 125 |  |  |     def to_JSON_dict(self): | 
            
                                                                        
                            
            
                                    
            
            
                | 126 |  |  |         m = dict() | 
            
                                                                        
                            
            
                                    
            
            
                | 127 |  |  |         m["id"] = self.id | 
            
                                                                        
                            
            
                                    
            
            
                | 128 |  |  |         m["type"] = self.type | 
            
                                                                        
                            
            
                                    
            
            
                | 129 |  |  |         m["label"] = self.label | 
            
                                                                        
                            
            
                                    
            
            
                | 130 |  |  |         m["labels"] = self.labels | 
            
                                                                        
                            
            
                                    
            
            
                | 131 |  |  |         m["tokenInterval"] = self.tokenInterval.to_JSON_dict() | 
            
                                                                        
                            
            
                                    
            
            
                | 132 |  |  |         m["characterStartOffset"] = self.characterStartOffset | 
            
                                                                        
                            
            
                                    
            
            
                | 133 |  |  |         m["characterEndOffset"] = self.characterEndOffset | 
            
                                                                        
                            
            
                                    
            
            
                | 134 |  |  |         m["sentence"] = self.sentence | 
            
                                                                        
                            
            
                                    
            
            
                | 135 |  |  |         m["document"] = self._doc_id | 
            
                                                                        
                            
            
                                    
            
            
                | 136 |  |  |         # do we have a trigger? | 
            
                                                                        
                            
            
                                    
            
            
                | 137 |  |  |         if self.trigger: | 
            
                                                                        
                            
            
                                    
            
            
                | 138 |  |  |              m["trigger"] = self.trigger.to_JSON_dict() | 
            
                                                                        
                            
            
                                    
            
            
                | 139 |  |  |         # do we have arguments? | 
            
                                                                        
                            
            
                                    
            
            
                | 140 |  |  |         if self.arguments: | 
            
                                                                        
                            
            
                                    
            
            
                | 141 |  |  |             m["arguments"] = self._arguments_to_JSON_dict() | 
            
                                                                        
                            
            
                                    
            
            
                | 142 |  |  |         # handle paths | 
            
                                                                        
                            
            
                                    
            
            
                | 143 |  |  |         if self.paths: | 
            
                                                                        
                            
            
                                    
            
            
                | 144 |  |  |             m["paths"] = self.paths | 
            
                                                                        
                            
            
                                    
            
            
                | 145 |  |  |         m["keep"] = self.keep | 
            
                                                                        
                            
            
                                    
            
            
                | 146 |  |  |         m["foundBy"] = self.foundBy | 
            
                                                                        
                            
            
                                    
            
            
                | 147 |  |  |         return m | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |     def matches(self, label_pattern): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |         Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |         Parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         ---------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         label_pattern : str or _sre.SRE_Pattern | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |             The pattern to match against each element in `Mention.labels` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         Returns | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |         ------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         bool | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |             True if `label_pattern` matches any element in `Mention.labels` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         return any(label_pattern.match(label) for label in self.labels) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |     def to_JSON(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |     def _arguments_to_JSON_dict(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |         return dict((role, [a.to_JSON_dict() for a in args]) for (role, args) in self.arguments.items()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     def _paths_to_JSON_dict(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         return {role: paths.to_JSON_dict() for (role, paths) in self.paths} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |     def load_from_JSON(mjson, docs_dict): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |         # recover document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |         doc_id = mjson["document"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |         doc = docs_dict[doc_id] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |         labels = mjson["labels"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         kwargs = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |             "label": mjson.get("label", labels[0]), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |             "labels": labels, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |             "token_interval": Interval.load_from_JSON(mjson["tokenInterval"]), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |             "sentence": mjson["sentence"], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |             "document": doc, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |             "doc_id": doc_id, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |             "trigger": mjson.get("trigger", None), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |             "arguments": mjson.get("arguments", None), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |             "paths": mjson.get("paths", None), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |             "keep": mjson.get("keep", True), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |             "foundBy": mjson["foundBy"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |         m = Mention(**kwargs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |         # set IDs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |         m.id = mjson["id"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |         m._doc_id = doc_id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |         # set character offsets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |         m.character_start_offset = mjson["characterStartOffset"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |         m.character_end_offset = mjson["characterEndOffset"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |         return m | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |     def _to_document_map(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |         return {self._doc_id: self.document} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |     def _set_type(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |         # event mention | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |         if self.trigger != None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |             return Mention.EM | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |         # textbound mention | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |         elif self.trigger == None and self.arguments == None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |             return Mention.TBM | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |         else: | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 213 |  |  |             return Mention.RM | 
            
                                                        
            
                                    
            
            
                | 214 |  |  |  |