processors.annotators - Code Metrics - clu-ling/py-processors - Measure and Improve Code Quality continuously with Scrutinizer

processors.annotators A
last analyzed 2024-01-21 06:33 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	202
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	22
eloc	68
dl	0
loc	202
rs	10
c	0
b	0
f	0

20 Methods

Rating	Name	Size	Complexity
A	Processor.annotate_from_sentences()	22	2
A	FastNLPProcessor.__init__()	4	1
A	Processor._annotate_message()	3	1
A	SegmentedMessage.to_JSON_dict()	4	1
A	SegmentedMessage.__init__()	2	1
A	CluProcessor.__init__()	2	1
A	Processor.annotate()	23	2
A	Message.__init__()	2	1
A	FastNLPProcessor.chunk_document()	3	1
A	SegmentedMessage.to_JSON()	2	1
A	Message.to_JSON()	2	1
A	FastNLPProcessor._chunk()	2	1
A	BioNLPProcessor.__init__()	2	1
A	Message.to_JSON_dict()	4	1
A	BioNLPProcessor.annotate()	2	1
A	CluProcessor.annotate()	2	1
A	Processor._message_to_json_dict()	2	1
A	FastNLPProcessor.annotate()	2	1
A	FastNLPProcessor.chunk_sentence()	3	1
A	Processor.__init__()	2	1

# -*- coding: utf-8 -*-

# use data structures
from __future__ import unicode_literals
from processors.ds import Document, Sentence, DirectedGraph
from processors.utils import post_json
import json


class Processor(object):
    """
    Base Processor for text annotation (tokenization, sentence splitting,
    parsing, lemmatization, PoS tagging, named entity recognition, chunking, etc.).

    Parameters
    ----------
    address : str
        The base address for the API (i.e., everything preceding `/api/..`)


    Attributes
    ----------
    service : str
        The API endpoint for `annotate` requests.

    Methods
    -------
    annotate(text)
        Produces an annotated `Document` from the provided text.
    annotate_from_sentences(sentences)
        Produces an annotated `Document` from a [str] of text already split into sentences.

    """
    def __init__(self, address):
        self.service = "{}/api/annotate".format(address)

    def _message_to_json_dict(self, msg):
        return post_json(self.service, msg.to_JSON())

    def _annotate_message(self, msg):
        annotated_text = post_json(self.service, msg.to_JSON())
        return Document.load_from_JSON(annotated_text)

    def annotate(self, text):
        """
        Annotate text (tokenization, sentence splitting,
        parsing, lemmatization, PoS tagging, named entity recognition, chunking, etc.)

        Parameters
        ----------
        text : str
            `text` to be annotated.

        Returns
        -------
        processors.ds.Document or None
            An annotated Document composed of `sentences`.
        """
        try:
            # load json and build Sentences and Document
            msg = Message(text)
            return self._annotate_message(msg)

        except Exception as e:
            #print(e)
            return None

    def annotate_from_sentences(self, sentences):
        """
        Annotate text that has already been segmented into `sentences`.

        Parameters
        ----------
        sentences : [str]
            A list of str representing text already split into sentences.

        Returns
        -------
        processors.ds.Document or None
            An annotated `Document` composed of `sentences`.
        """
        try:
            # load json from str interable and build Sentences and Document
            msg = SegmentedMessage(sentences)
            return self._annotate_message(msg)

        except Exception as e:
            #print(e)
            return None

class CluProcessor(Processor):

    """
    Processor for text annotation based on [`org.clulab.processors.clu.CluProcessor`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/clu/CluProcessor.scala)

    Uses the Malt parser.
    """
    def __init__(self, address):
        self.service = "{}/api/clu/annotate".format(address)

    def annotate(self, text):
        return super(CluProcessor, self).annotate(text)


class FastNLPProcessor(Processor):

    """
    Processor for text annotation based on [`org.clulab.processors.fastnlp.FastNLPProcessor`](https://github.com/clulab/processors/blob/master/corenlp/src/main/scala/org/clulab/processors/fastnlp/FastNLPProcessor.scala)

    Uses the Stanford CoreNLP neural network parser.
    """
    def __init__(self, address):
        self.address = address
        self.service = "{}/api/fastnlp/annotate".format(address)
        self.chunk_address = "{}/api/fastnlp/chunk".format(self.address)


    def annotate(self, text):
        return super(FastNLPProcessor, self).annotate(text)

    def _chunk(self, obj):
        return post_json(self.chunk_address, obj.to_JSON())

    def chunk_sentence(self, sentence):
        res = self._chunk(sentence)
        return Sentence.load_from_JSON(res)

    def chunk_document(self, doc):
        res = self._chunk(doc)
        return Document.load_from_JSON(res)


class BioNLPProcessor(Processor):

    """
    Processor for biomedical text annotation based on [`org.clulab.processors.fastnlp.FastNLPProcessor`](https://github.com/clulab/processors/blob/master/corenlp/src/main/scala/org/clulab/processors/fastnlp/FastNLPProcessor.scala)

    CoreNLP-derived annotator.

    """

    def __init__(self, address):
        self.service = "{}/api/bionlp/annotate".format(address)

    def annotate(self, text):
        return super(BioNLPProcessor, self).annotate(text)


class Message(object):

    """
    A storage class for passing `text` to API `annotate` endpoint.

    Attributes
    ----------
    text : str
        The `text` to be annotated.

    Methods
    -------
    to_JSON()
        Produces a json str in the structure expected by the API `annotate` endpoint.

    """
    def __init__(self, text):
        self.text = text

    def to_JSON_dict(self):
        jdict = dict()
        jdict["text"] = self.text
        return jdict

    def to_JSON(self):
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)


class SegmentedMessage(object):
    """
    A storage class for passing text already split into sentences to API `annotate` endpoint.

    Attributes
    ----------
    segments : [str]
        Text to be annotated that has already been split into sentences.  This segmentation is preserved during annotation.

    Methods
    -------
    to_JSON()
        Produces a json str in the structure expected by the API `annotate` endpoint.

    """
    def __init__(self, segments):
        self.segments = segments

    def to_JSON_dict(self):
        jdict = dict()
        jdict["segments"] = self.segments
        return jdict

    def to_JSON(self):
        return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)


1			# -- coding: utf-8 --
2
3			# use data structures
4			from __future__ import unicode_literals
5			from processors.ds import Document, Sentence, DirectedGraph
6			from processors.utils import post_json
7			import json
8
9
10			class Processor(object):
11			"""
12			Base Processor for text annotation (tokenization, sentence splitting,
13			parsing, lemmatization, PoS tagging, named entity recognition, chunking, etc.).
14
15			Parameters
16			----------
17			address : str
18			The base address for the API (i.e., everything preceding `/api/..`)
19
20
21			Attributes
22			----------
23			service : str
24			The API endpoint for `annotate` requests.
25
26			Methods
27			-------
28			annotate(text)
29			Produces an annotated `Document` from the provided text.
30			annotate_from_sentences(sentences)
31			Produces an annotated `Document` from a [str] of text already split into sentences.
32
33			"""
34			def __init__(self, address):
35			self.service = "{}/api/annotate".format(address)
36
37			def _message_to_json_dict(self, msg):
38			return post_json(self.service, msg.to_JSON())
39
40			def _annotate_message(self, msg):
41			annotated_text = post_json(self.service, msg.to_JSON())
42			return Document.load_from_JSON(annotated_text)
43
44			def annotate(self, text):
45			"""
46			Annotate text (tokenization, sentence splitting,
47			parsing, lemmatization, PoS tagging, named entity recognition, chunking, etc.)
48
49			Parameters
50			----------
51			text : str
52			`text` to be annotated.
53
54			Returns
55			-------
56			processors.ds.Document or None
57			An annotated Document composed of `sentences`.
58			"""
59			try:
60			# load json and build Sentences and Document
61			msg = Message(text)
62			return self._annotate_message(msg)
63
64			except Exception as e:
65			#print(e)
66			return None
67
68			def annotate_from_sentences(self, sentences):
69			"""
70			Annotate text that has already been segmented into `sentences`.
71
72			Parameters
73			----------
74			sentences : [str]
75			A list of str representing text already split into sentences.
76
77			Returns
78			-------
79			processors.ds.Document or None
80			An annotated `Document` composed of `sentences`.
81			"""
82			try:
83			# load json from str interable and build Sentences and Document
84			msg = SegmentedMessage(sentences)
85			return self._annotate_message(msg)
86
87			except Exception as e:
88			#print(e)
89			return None
90
91			class CluProcessor(Processor):
92
93			"""
94			Processor for text annotation based on [`org.clulab.processors.clu.CluProcessor`](https://github.com/clulab/processors/blob/master/main/src/main/scala/org/clulab/processors/clu/CluProcessor.scala)
95
96			Uses the Malt parser.
97			"""
98			def __init__(self, address):
99			self.service = "{}/api/clu/annotate".format(address)
100
101			def annotate(self, text):
102			return super(CluProcessor, self).annotate(text)
103
104
105			class FastNLPProcessor(Processor):
106
107			"""
108			Processor for text annotation based on [`org.clulab.processors.fastnlp.FastNLPProcessor`](https://github.com/clulab/processors/blob/master/corenlp/src/main/scala/org/clulab/processors/fastnlp/FastNLPProcessor.scala)
109
110			Uses the Stanford CoreNLP neural network parser.
111			"""
112			def __init__(self, address):
113			self.address = address
114			self.service = "{}/api/fastnlp/annotate".format(address)
115			self.chunk_address = "{}/api/fastnlp/chunk".format(self.address)
116
117
118			def annotate(self, text):
119			return super(FastNLPProcessor, self).annotate(text)
120
121			def _chunk(self, obj):
122			return post_json(self.chunk_address, obj.to_JSON())
123
124			def chunk_sentence(self, sentence):
125			res = self._chunk(sentence)
126			return Sentence.load_from_JSON(res)
127
128			def chunk_document(self, doc):
129			res = self._chunk(doc)
130			return Document.load_from_JSON(res)
131
132
133			class BioNLPProcessor(Processor):
134
135			"""
136			Processor for biomedical text annotation based on [`org.clulab.processors.fastnlp.FastNLPProcessor`](https://github.com/clulab/processors/blob/master/corenlp/src/main/scala/org/clulab/processors/fastnlp/FastNLPProcessor.scala)
137
138			CoreNLP-derived annotator.
139
140			"""
141
142			def __init__(self, address):
143			self.service = "{}/api/bionlp/annotate".format(address)
144
145			def annotate(self, text):
146			return super(BioNLPProcessor, self).annotate(text)
147
148
149			class Message(object):
150
151			"""
152			A storage class for passing `text` to API `annotate` endpoint.
153
154			Attributes
155			----------
156			text : str
157			The `text` to be annotated.
158
159			Methods
160			-------
161			to_JSON()
162			Produces a json str in the structure expected by the API `annotate` endpoint.
163
164			"""
165			def __init__(self, text):
166			self.text = text
167
168			def to_JSON_dict(self):
169			jdict = dict()
170			jdict["text"] = self.text
171			return jdict
172
173			def to_JSON(self):
174			return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
175
176
177			class SegmentedMessage(object):
178			"""
179			A storage class for passing text already split into sentences to API `annotate` endpoint.
180
181			Attributes
182			----------
183			segments : [str]
184			Text to be annotated that has already been split into sentences. This segmentation is preserved during annotation.
185
186			Methods
187			-------
188			to_JSON()
189			Produces a json str in the structure expected by the API `annotate` endpoint.
190
191			"""
192			def __init__(self, segments):
193			self.segments = segments
194
195			def to_JSON_dict(self):
196			jdict = dict()
197			jdict["segments"] = self.segments
198			return jdict
199
200			def to_JSON(self):
201			return json.dumps(self.to_JSON_dict(), sort_keys=True, indent=4)
202

clu-ling / py-processors

processors.annotators A last analyzed 2024-01-21 06:33 UTC

Complexity

Size/Duplication

Importance

20 Methods

Duplication Side-by-Side

Filter issues like

processors.annotators A
last analyzed 2024-01-21 06:33 UTC