Completed
Push — master ( 352884...922d0d )
by Gus
01:03
created

Mention.to_JSON_dict()   B

Complexity

Conditions 4

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 4
dl 0
loc 23
rs 8.7972
c 3
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
from __future__ import unicode_literals
4
from .utils import post_json
5
from .ds import Document, Interval
6
import re
7
import json
8
9
10
class Mention(object):
11
    """
12
    A labeled span of text.  Used to model textual mentions of events, relations, and entities.
13
14
    Parameters
15
    ----------
16
    token_interval : Interval
17
        The span of the Mention represented as an Interval.
18
    sentence : int
19
        The sentence index that contains the Mention.
20
    document : Document
21
        The Document in which the Mention was found.
22
    foundBy : str
23
        The Odin IE rule that produced this Mention.
24
    label : str
25
        The label most closely associated with this span.  Usually the lowest hyponym of "labels".
26
    labels: list
27
        The list of labels associated with this span.
28
    trigger: dict or None
29
        dict of JSON for Mention's trigger (event predicate or word(s) signaling the Mention).
30
    arguments: dict or None
31
        dict of JSON for Mention's arguments.
32
    paths: dict or None
33
        dict of JSON encoding the syntactic paths linking a Mention's arguments to its trigger (applies to Mentions produces from `type:"dependency"` rules).
34
    doc_id: str or None
35
        the id of the document
36
37
    Attributes
38
    ----------
39
    tokenInterval: processors.ds.Interval
40
        An `Interval` encoding the `start` and `end` of the `Mention`.
41
    start : int
42
        The token index that starts the `Mention`.
43
    end : int
44
        The token index that marks the end of the Mention (exclusive).
45
    sentenceObj : processors.ds.Sentence
46
        Pointer to the `Sentence` instance containing the `Mention`.
47
    characterStartOffset: int
48
        The index of the character that starts the `Mention`.
49
    characterEndOffset: int
50
        The index of the character that ends the `Mention`.
51
    type: Mention.TBM or Mention.EM or Mention.RM
52
        The type of the `Mention`.
53
54
    See Also
55
    --------
56
57
    [`Odin` manual](https://arxiv.org/abs/1509.07513)
58
59
    Methods
60
    -------
61
    matches(label_pattern)
62
        Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`.
63
64
    """
65
66
    TBM = "TextBoundMention"
67
    EM = "EventMention"
68
    RM = "RelationMention"
69
70
    def __init__(self,
71
                token_interval,
72
                sentence,
73
                document,
74
                foundBy,
75
                label,
76
                labels=None,
77
                trigger=None,
78
                arguments=None,
79
                paths=None,
80
                keep=True,
81
                doc_id=None):
82
83
        self.label = label
84
        self.labels = labels if labels else [self.label]
85
        self.tokenInterval = token_interval
86
        self.start = self.tokenInterval.start
87
        self.end = self.tokenInterval.end
88
        self.document = document
89 View Code Duplication
        self._doc_id = doc_id or hash(self.document)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
90
        self.sentence = sentence
91
        if trigger:
92
            # NOTE: doc id is not stored for trigger's json,
93
            # as it is assumed to be contained in the same document as its parent
94
            trigger.update({"document": self._doc_id})
95
            self.trigger = Mention.load_from_JSON(trigger, self._to_document_map())
96
        else:
97
            self.trigger = None
98
        # unpack args
99
        self.arguments = {role:[Mention.load_from_JSON(a, self._to_document_map()) for a in args] for (role, args) in arguments.items()} if arguments else None
100
        self.paths = paths
101
        self.keep = keep
102
        self.foundBy = foundBy
103
        # other
104
        self.sentenceObj = self.document.sentences[self.sentence]
105 View Code Duplication
        self.text = " ".join(self.sentenceObj.words[self.start:self.end])
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
106
        # recover offsets
107
        self.characterStartOffset = self.sentenceObj.startOffsets[self.tokenInterval.start]
108
        self.characterEndOffset = self.sentenceObj.endOffsets[self.tokenInterval.end]
109
        # for later recovery
110
        self.id = None
111
        self.type = self._set_type()
112
113
    def __eq__(self, other):
114
        if isinstance(other, self.__class__):
115
            return self.__dict__ == other.__dict__
116
        else:
117
            return False
118
119
    def __ne__(self, other):
120
        return not self.__eq__(other)
121
122
    def __str__(self):
123
        return self.text
124
125
    def to_JSON_dict(self):
126
        m = dict()
127
        m["id"] = self.id
128
        m["type"] = self.type
129
        m["label"] = self.label
130
        m["labels"] = self.labels
131
        m["tokenInterval"] = self.tokenInterval.to_JSON_dict()
132
        m["characterStartOffset"] = self.characterStartOffset
133
        m["characterEndOffset"] = self.characterEndOffset
134
        m["sentence"] = self.sentence
135
        m["document"] = self._doc_id
136
        # do we have a trigger?
137
        if self.trigger:
138
             m["trigger"] = self.trigger.to_JSON_dict()
139
        # do we have arguments?
140
        if self.arguments:
141
            m["arguments"] = self._arguments_to_JSON_dict()
142
        # handle paths
143
        if self.paths:
144
            m["paths"] = self.paths
145
        m["keep"] = self.keep
146
        m["foundBy"] = self.foundBy
147
        return m
148
149
    def matches(self, label_pattern):
150
        """
151
        Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`.
152
153
        Parameters
154
        ----------
155
        label_pattern : str or _sre.SRE_Pattern
156
            The pattern to match against each element in `Mention.labels`
157
158
        Returns
159
        -------
160
        bool
161
            True if `label_pattern` matches any element in `Mention.labels`
162
        """
163
        return any(label_pattern.match(label) for label in self.labels)
164
165
    def to_JSON(self):
166
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
167
168
    def _arguments_to_JSON_dict(self):
169
        return dict((role, [a.to_JSON_dict() for a in args]) for (role, args) in self.arguments.items())
170
171
    def _paths_to_JSON_dict(self):
172
        return {role: paths.to_JSON_dict() for (role, paths) in self.paths}
173
174
    @staticmethod
175
    def load_from_JSON(mjson, docs_dict):
176
        # recover document
177
        doc_id = mjson["document"]
178
        doc = docs_dict[doc_id]
179
        labels = mjson["labels"]
180
        kwargs = {
181
            "label": mjson.get("label", labels[0]),
182
            "labels": labels,
183
            "token_interval": Interval.load_from_JSON(mjson["tokenInterval"]),
184
            "sentence": mjson["sentence"],
185
            "document": doc,
186
            "doc_id": doc_id,
187
            "trigger": mjson.get("trigger", None),
188
            "arguments": mjson.get("arguments", None),
189
            "paths": mjson.get("paths", None),
190
            "keep": mjson.get("keep", True),
191
            "foundBy": mjson["foundBy"]
192
        }
193
        m = Mention(**kwargs)
194
        # set IDs
195
        m.id = mjson["id"]
196
        m._doc_id = doc_id
197
        # set character offsets
198
        m.character_start_offset = mjson["characterStartOffset"]
199
        m.character_end_offset = mjson["characterEndOffset"]
200
        return m
201
202
    def _to_document_map(self):
203
        return {self._doc_id: self.document}
204
205
    def _set_type(self):
206
        # event mention
207
        if self.trigger != None:
208
            return Mention.EM
209
        # textbound mention
210
        elif self.trigger == None and self.arguments == None:
211
            return Mention.TBM
212
        else:
213
            return Mention.RM
214