Mention.load_from_JSON() - Code Metrics - Inspection of "Version 3.0.0" - myedibleenso/py-processors - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 352884...922d0d )

by Gus

created 2017-03-26 10:12 UTC

Mention.load_from_JSON() B

↳ Parent: Mention

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
cc	1
dl	0
loc	27
rs	8.8571
c	2
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from .utils import post_json
from .ds import Document, Interval
import re
import json


class Mention(object):
    """
    A labeled span of text.  Used to model textual mentions of events, relations, and entities.

    Parameters
    ----------
    token_interval : Interval
        The span of the Mention represented as an Interval.
    sentence : int
        The sentence index that contains the Mention.
    document : Document
        The Document in which the Mention was found.
    foundBy : str
        The Odin IE rule that produced this Mention.
    label : str
        The label most closely associated with this span.  Usually the lowest hyponym of "labels".
    labels: list
        The list of labels associated with this span.
    trigger: dict or None
        dict of JSON for Mention's trigger (event predicate or word(s) signaling the Mention).
    arguments: dict or None
        dict of JSON for Mention's arguments.
    paths: dict or None
        dict of JSON encoding the syntactic paths linking a Mention's arguments to its trigger (applies to Mentions produces from `type:"dependency"` rules).
    doc_id: str or None
        the id of the document

    Attributes
    ----------
    tokenInterval: processors.ds.Interval
        An `Interval` encoding the `start` and `end` of the `Mention`.
    start : int
        The token index that starts the `Mention`.
    end : int
        The token index that marks the end of the Mention (exclusive).
    sentenceObj : processors.ds.Sentence
        Pointer to the `Sentence` instance containing the `Mention`.
    characterStartOffset: int
        The index of the character that starts the `Mention`.
    characterEndOffset: int
        The index of the character that ends the `Mention`.
    type: Mention.TBM or Mention.EM or Mention.RM
        The type of the `Mention`.

    See Also
    --------

    [`Odin` manual](https://arxiv.org/abs/1509.07513)

    Methods
    -------
    matches(label_pattern)
        Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`.

    """

    TBM = "TextBoundMention"
    EM = "EventMention"
    RM = "RelationMention"

    def __init__(self,
                token_interval,
                sentence,
                document,
                foundBy,
                label,
                labels=None,
                trigger=None,
                arguments=None,
                paths=None,
                keep=True,
                doc_id=None):

        self.label = label
        self.labels = labels if labels else [self.label]
        self.tokenInterval = token_interval
        self.start = self.tokenInterval.start
        self.end = self.tokenInterval.end
        self.document = document
        self._doc_id = doc_id or hash(self.document)

        self.sentence = sentence
        if trigger:
            # NOTE: doc id is not stored for trigger's json,
            # as it is assumed to be contained in the same document as its parent
            trigger.update({"document": self._doc_id})
            self.trigger = Mention.load_from_JSON(trigger, self._to_document_map())
        else:
            self.trigger = None
        # unpack args
        self.arguments = {role:[Mention.load_from_JSON(a, self._to_document_map()) for a in args] for (role, args) in arguments.items()} if arguments else None
        self.paths = paths
        self.keep = keep
        self.foundBy = foundBy
        # other
        self.sentenceObj = self.document.sentences[self.sentence]
        self.text = " ".join(self.sentenceObj.words[self.start:self.end])

        # recover offsets
        self.characterStartOffset = self.sentenceObj.startOffsets[self.tokenInterval.start]
        self.characterEndOffset = self.sentenceObj.endOffsets[self.tokenInterval.end]
        # for later recovery
        self.id = None
        self.type = self._set_type()

    def __eq__(self, other):
        if isinstance(other, self.__class__):
            return self.__dict__ == other.__dict__
        else:
            return False

    def __ne__(self, other):
        return not self.__eq__(other)

    def __str__(self):
        return self.text

    def to_JSON_dict(self):
        m = dict()
        m["id"] = self.id
        m["type"] = self.type
        m["label"] = self.label
        m["labels"] = self.labels
        m["tokenInterval"] = self.tokenInterval.to_JSON_dict()
        m["characterStartOffset"] = self.characterStartOffset
        m["characterEndOffset"] = self.characterEndOffset
        m["sentence"] = self.sentence
        m["document"] = self._doc_id
        # do we have a trigger?
        if self.trigger:
             m["trigger"] = self.trigger.to_JSON_dict()
        # do we have arguments?
        if self.arguments:
            m["arguments"] = self._arguments_to_JSON_dict()
        # handle paths
        if self.paths:
            m["paths"] = self.paths
        m["keep"] = self.keep
        m["foundBy"] = self.foundBy
        return m

    def matches(self, label_pattern):
        """
        Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`.

        Parameters
        ----------
        label_pattern : str or _sre.SRE_Pattern
            The pattern to match against each element in `Mention.labels`

        Returns
        -------
        bool
            True if `label_pattern` matches any element in `Mention.labels`
        """
        return any(label_pattern.match(label) for label in self.labels)

    def to_JSON(self):
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)

    def _arguments_to_JSON_dict(self):
        return dict((role, [a.to_JSON_dict() for a in args]) for (role, args) in self.arguments.items())

    def _paths_to_JSON_dict(self):
        return {role: paths.to_JSON_dict() for (role, paths) in self.paths}

    @staticmethod
    def load_from_JSON(mjson, docs_dict):
        # recover document
        doc_id = mjson["document"]
        doc = docs_dict[doc_id]
        labels = mjson["labels"]
        kwargs = {
            "label": mjson.get("label", labels[0]),
            "labels": labels,
            "token_interval": Interval.load_from_JSON(mjson["tokenInterval"]),
            "sentence": mjson["sentence"],
            "document": doc,
            "doc_id": doc_id,
            "trigger": mjson.get("trigger", None),
            "arguments": mjson.get("arguments", None),
            "paths": mjson.get("paths", None),
            "keep": mjson.get("keep", True),
            "foundBy": mjson["foundBy"]
        }
        m = Mention(**kwargs)
        # set IDs
        m.id = mjson["id"]
        m._doc_id = doc_id
        # set character offsets
        m.character_start_offset = mjson["characterStartOffset"]
        m.character_end_offset = mjson["characterEndOffset"]
        return m

    def _to_document_map(self):
        return {self._doc_id: self.document}

    def _set_type(self):
        # event mention
        if self.trigger != None:
            return Mention.EM
        # textbound mention
        elif self.trigger == None and self.arguments == None:
            return Mention.TBM
        else:
            return Mention.RM


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3		from __future__ import unicode_literals
4		from .utils import post_json
5		from .ds import Document, Interval
6		import re
7		import json
8
9
10		class Mention(object):
11		"""
12		A labeled span of text. Used to model textual mentions of events, relations, and entities.
13
14		Parameters
15		----------
16		token_interval : Interval
17		The span of the Mention represented as an Interval.
18		sentence : int
19		The sentence index that contains the Mention.
20		document : Document
21		The Document in which the Mention was found.
22		foundBy : str
23		The Odin IE rule that produced this Mention.
24		label : str
25		The label most closely associated with this span. Usually the lowest hyponym of "labels".
26		labels: list
27		The list of labels associated with this span.
28		trigger: dict or None
29		dict of JSON for Mention's trigger (event predicate or word(s) signaling the Mention).
30		arguments: dict or None
31		dict of JSON for Mention's arguments.
32		paths: dict or None
33		dict of JSON encoding the syntactic paths linking a Mention's arguments to its trigger (applies to Mentions produces from `type:"dependency"` rules).
34		doc_id: str or None
35		the id of the document
36
37		Attributes
38		----------
39		tokenInterval: processors.ds.Interval
40		An `Interval` encoding the `start` and `end` of the `Mention`.
41		start : int
42		The token index that starts the `Mention`.
43		end : int
44		The token index that marks the end of the Mention (exclusive).
45		sentenceObj : processors.ds.Sentence
46		Pointer to the `Sentence` instance containing the `Mention`.
47		characterStartOffset: int
48		The index of the character that starts the `Mention`.
49		characterEndOffset: int
50		The index of the character that ends the `Mention`.
51		type: Mention.TBM or Mention.EM or Mention.RM
52		The type of the `Mention`.
53
54		See Also
55		--------
56
57		[`Odin` manual](https://arxiv.org/abs/1509.07513)
58
59		Methods
60		-------
61		matches(label_pattern)
62		Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`.
63
64		"""
65
66		TBM = "TextBoundMention"
67		EM = "EventMention"
68		RM = "RelationMention"
69
70		def __init__(self,
71		token_interval,
72		sentence,
73		document,
74		foundBy,
75		label,
76		labels=None,
77		trigger=None,
78		arguments=None,
79		paths=None,
80		keep=True,
81		doc_id=None):
82
83		self.label = label
84		self.labels = labels if labels else [self.label]
85		self.tokenInterval = token_interval
86		self.start = self.tokenInterval.start
87		self.end = self.tokenInterval.end
88		self.document = document
89	View Code Duplication	self._doc_id = doc_id or hash(self.document)
		0 ignored issues – show Duplication introduced 2016-06-19 06:22 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
90		self.sentence = sentence
91		if trigger:
92		# NOTE: doc id is not stored for trigger's json,
93		# as it is assumed to be contained in the same document as its parent
94		trigger.update({"document": self._doc_id})
95		self.trigger = Mention.load_from_JSON(trigger, self._to_document_map())
96		else:
97		self.trigger = None
98		# unpack args
99		self.arguments = {role:[Mention.load_from_JSON(a, self._to_document_map()) for a in args] for (role, args) in arguments.items()} if arguments else None
100		self.paths = paths
101		self.keep = keep
102		self.foundBy = foundBy
103		# other
104		self.sentenceObj = self.document.sentences[self.sentence]
105	View Code Duplication	self.text = " ".join(self.sentenceObj.words[self.start:self.end])
		0 ignored issues – show Duplication introduced 2016-06-19 06:22 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
106		# recover offsets
107		self.characterStartOffset = self.sentenceObj.startOffsets[self.tokenInterval.start]
108		self.characterEndOffset = self.sentenceObj.endOffsets[self.tokenInterval.end]
109		# for later recovery
110		self.id = None
111		self.type = self._set_type()
112
113		def __eq__(self, other):
114		if isinstance(other, self.__class__):
115		return self.__dict__ == other.__dict__
116		else:
117		return False
118
119		def __ne__(self, other):
120		return not self.__eq__(other)
121
122		def __str__(self):
123		return self.text
124
125		def to_JSON_dict(self):
126		m = dict()
127		m["id"] = self.id
128		m["type"] = self.type
129		m["label"] = self.label
130		m["labels"] = self.labels
131		m["tokenInterval"] = self.tokenInterval.to_JSON_dict()
132		m["characterStartOffset"] = self.characterStartOffset
133		m["characterEndOffset"] = self.characterEndOffset
134		m["sentence"] = self.sentence
135		m["document"] = self._doc_id
136		# do we have a trigger?
137		if self.trigger:
138		m["trigger"] = self.trigger.to_JSON_dict()
139		# do we have arguments?
140		if self.arguments:
141		m["arguments"] = self._arguments_to_JSON_dict()
142		# handle paths
143		if self.paths:
144		m["paths"] = self.paths
145		m["keep"] = self.keep
146		m["foundBy"] = self.foundBy
147		return m
148
149		def matches(self, label_pattern):
150		"""
151		Test if the provided pattern, `label_pattern`, matches any element in `Mention.labels`.
152
153		Parameters
154		----------
155		label_pattern : str or _sre.SRE_Pattern
156		The pattern to match against each element in `Mention.labels`
157
158		Returns
159		-------
160		bool
161		True if `label_pattern` matches any element in `Mention.labels`
162		"""
163		return any(label_pattern.match(label) for label in self.labels)
164
165		def to_JSON(self):
166		return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
167
168		def _arguments_to_JSON_dict(self):
169		return dict((role, [a.to_JSON_dict() for a in args]) for (role, args) in self.arguments.items())
170
171		def _paths_to_JSON_dict(self):
172		return {role: paths.to_JSON_dict() for (role, paths) in self.paths}
173
174		@staticmethod
175		def load_from_JSON(mjson, docs_dict):
176		# recover document
177		doc_id = mjson["document"]
178		doc = docs_dict[doc_id]
179		labels = mjson["labels"]
180		kwargs = {
181		"label": mjson.get("label", labels[0]),
182		"labels": labels,
183		"token_interval": Interval.load_from_JSON(mjson["tokenInterval"]),
184		"sentence": mjson["sentence"],
185		"document": doc,
186		"doc_id": doc_id,
187		"trigger": mjson.get("trigger", None),
188		"arguments": mjson.get("arguments", None),
189		"paths": mjson.get("paths", None),
190		"keep": mjson.get("keep", True),
191		"foundBy": mjson["foundBy"]
192		}
193		m = Mention(**kwargs)
194		# set IDs
195		m.id = mjson["id"]
196		m._doc_id = doc_id
197		# set character offsets
198		m.character_start_offset = mjson["characterStartOffset"]
199		m.character_end_offset = mjson["characterEndOffset"]
200		return m
201
202		def _to_document_map(self):
203		return {self._doc_id: self.document}
204
205		def _set_type(self):
206		# event mention
207		if self.trigger != None:
208		return Mention.EM
209		# textbound mention
210		elif self.trigger == None and self.arguments == None:
211		return Mention.TBM
212		else:
213		return Mention.RM
214

myedibleenso / py-processors

Push — master ( 352884...922d0d )

Mention.load_from_JSON() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like