Completed
Push — master ( 36b08f...64c530 )
by Gus
01:16
created

ProcessorsAPI._resolve_jar_path()   C

Complexity

Conditions 7

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 7
c 1
b 0
f 0
dl 0
loc 26
rs 5.5
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
from __future__ import unicode_literals
4
#from pkg_resources import resource_filename
5
from six.moves.urllib.request import urlretrieve
6
from .utils import *
7
from .processors import *
8
from .sentiment import SentimentAnalysisAPI
9
from .odin import OdinAPI
10
import os
11
import shlex
12
import os
13
import subprocess as sp
14
import requests
15
import time
16
import sys
17
import logging
18
import warnings
19
20
21
class ProcessorsAPI(object):
22
23
    """
24
    Manages a connection with the processors-server jar and provides an interface to the API.
25
26
    Parameters
27
    ----------
28
    port : int
29
        The port the server is running on or should be started on.  Default is 8886.
30
    hostname : str
31
        The host name to use for the server.  Default is "localhost".
32
    timeout : int
33
        The number of seconds to wait for the server to initialize.  Default is 120.
34
    jvm_mem : str
35
        The maximum amount of memory to allocate to the JVM for the server.  Default is "-Xmx3G".
36
    jar_path : str
37
        The path to the processors-server jar.  Default is the jar installed with the package.
38
    kee_alive : bool
39
        Whether or not to keep the server running when ProcessorsAPI instance goes out of scope.  Default is false (server is shut down).
40
    log_file: str
41
        The path for the log file.  Default is .py-processors.log in the user's home directory.
42
43
    Methods
44
    -------
45
    annotate(text)
46
        Produces a Document from the provided `text` using the default processor.
47
    fastnlp.annotate(text)
48
        Produces a Document from the provided `text` using FastNLPProcessor.
49
    bionlp.annotate(text)
50
        Produces a Document from the provided `text` using BioNLPProcessor.
51
    annotate_from_sentences(sentences)
52
        Produces a Document from `sentences` (a list of text split into sentences). Uses the default processor.
53
    fastnlp.annotate_from_sentences(sentences)
54
        Produces a Document from `sentences` (a list of text split into sentences). Uses FastNLPProcessor.
55
    bionlp.annotate_from_sentences(sentences)
56
        Produces a Document from `sentences` (a list of text split into sentences). Uses BioNLPProcessor.
57
    corenlp.sentiment.score_sentence(sentence)
58
        Produces a sentiment score for the provided `sentence` (an instance of Sentence).
59
    corenlp.sentiment.score_document(doc)
60
        Produces sentiment scores for the provided `doc` (an instance of Document).  One score is produced for each sentence.
61
    corenlp.sentiment.score_segmented_text
62
        Produces sentiment scores for the provided `sentences` (a list of text segmented into sentences).  One score is produced for item in `sentences`.
63
    odin.extract_from_text(text, rules)
64
        Produces a list of Mentions for matches of the provided `rules` on the `text`.  `rules` can be a string of Odin rules, or a url ending in .yml or yaml.
65
    odin.extract_from_document(doc, rules)
66
        Produces a list of Mentions for matches of the provided `rules` on the `doc` (an instance of Document).  `rules` can be a string of Odin rules, or a url ending in .yml or yaml.
67
    start_server(jar_path, **kwargs)
68
        Starts the server using the provided `jar_path`.  Optionally takes hostname, port, jvm_mem, and timeout.
69
    stop_server()
70
        Attempts to stop the server running at self.address.
71
    """
72
73
    PROC_VAR = 'PROCESSORS_SERVER'
74
    TIMEOUT = 120
75
    # save to lib loc
76
    DEFAULT_JAR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "processors-server.jar")
77
    PORT = 8886
78
    JVM_MEM = "-Xmx3G"
79
    HOST = "localhost"
80
    LOG = full_path(os.path.join("~", ".py-processors.log"))
81
    #print(resource_filename(__name__, "processors-server.jar"))
82
83
    def __init__(self, **kwargs):
84
85
        self.hostname = kwargs.get("hostname", ProcessorsAPI.HOST)
86
        self.port = kwargs.get("port", ProcessorsAPI.PORT)
87
        self.make_address(self.hostname, self.port)
88
        self.timeout = kwargs.get("timeout", ProcessorsAPI.TIMEOUT)
89
        self.jvm_mem = kwargs.get("jvm_mem", ProcessorsAPI.JVM_MEM)
90
        self._start_command = "java {mem} -cp {jp} NLPServer --port {port} --host {host}" # mem, jar path, port, host
91
        # whether or not to stop the server when the object is destroyed
92
        self.keep_alive = kwargs.get("keep_alive", False)
93
        # how long to wait between requests
94
        self.wait_time = 2
95
        # processors
96
        self.default = Processor(self.address)
97
        self.fastnlp = FastNLPProcessor(self.address)
98
        self.bionlp = BioNLPProcessor(self.address)
99
        # sentiment
100
        self.sentiment = SentimentAnalysisAPI(self.address)
101
        # odin
102
        self.odin = OdinAPI(self.address)
103
        # use the os module's devnull for compatibility with python 2.7
104
        #self.DEVNULL = open(os.devnull, 'wb')
105
        self.logger = logging.getLogger(__name__)
106
        self.log_file = self._prepare_log_file(kwargs.get("log_file", ProcessorsAPI.LOG))
107
        self.jar_path = kwargs.get("jar_path", ProcessorsAPI.DEFAULT_JAR)
108
        # attempt to establish connection with server
109
        self.establish_connection()
110
111
    def _check_server_version(self):
112
        """
113
        Checks server version to see if it meets the recommendations
114
        """
115
        # avoid circular imports by delaying this import
116
        from .__init__ import __ps_rec__
117
        try:
118
            service_address = "{}/version".format(self.address)
119
            server_version = post_json(service_address, None)["version"]
120
            if float(__ps_rec__) != float(server_version):
121
                warnings.warn("Recommended server version is {}, but server version is {}".format(__ps_rec__, server_version))
122
            else:
123
                self.logger.info("Server version meets recommendations (v{})".format(__ps_rec__))
124
        except:
125
            warnings.warn("Unable to determine server version.  Recommended version is {}".format(__ps_rec__))
126
127
128
    def _prepare_log_file(self, lf):
129
        """
130
        Configure logger and return file path for logging
131
        """
132
        # log_file
133
        log_file = ProcessorsAPI.LOG if not lf else os.path.expanduser(lf)
134
        # configure logger
135
        self.logger.setLevel(logging.DEBUG)
136
        # create console handler and set level to info
137
        handler = logging.StreamHandler()
138
        handler.setLevel(logging.INFO)
139
        formatter = logging.Formatter("%(levelname)s - %(message)s")
140
        handler.setFormatter(formatter)
141
        self.logger.addHandler(handler)
142
        # create debug file handler and set level to debug
143
        handler = logging.FileHandler(log_file, "w")
144
        handler.setLevel(logging.DEBUG)
145
        formatter = logging.Formatter("%(levelname)s - %(message)s")
146
        handler.setFormatter(formatter)
147
        self.logger.addHandler(handler)
148
        return log_file
149
150
    def annotate(self, text):
151
        """
152
        Uses default processor (CoreNLP) to annotate text.  Included for backwards compatibility.
153
        """
154
        return self.default.annotate(text)
155
156
    def annotate_from_sentences(self, sentences):
157
        """
158
        Uses default processor (CoreNLP) to annotate a list of segmented sentences.
159
        """
160
        return self.default.annotate_from_sentences(sentences)
161
162
    def is_running(self):
163
        return True if self.annotate("Blah") else False
164
165
    def establish_connection(self):
166
        """
167
        Attempt to connect to a server (assumes server is running)
168
        """
169
        if self.is_running():
170
            self.logger.info("Connection with server established!")
171
            self._check_server_version()
172
        else:
173
            try:
174
                # resolve jar path if server is not already running
175
                self._resolve_jar_path(self.jar_path)
176
                # Attempt to start the server
177
                self._start_server()
178
            except Exception as e:
179
                self.logger.warn("Unable to start server. Please start the server manually with .start_server(jar_path=\"path/to/processors-server.jar\")")
180
                self.logger.warn("\n{}".format(e))
181
182
    def _resolve_jar_path(self, jar_path=None):
183
        """
184
        Attempts to preferentially set value of self.jar_path
185
        """
186
        # Preference 1: if a .jar is given, check to see if the path is valid
187
        if jar_path:
188
            jp = full_path(jar_path)
189
            # check if path is valid
190
            if os.path.exists(jp):
191
                self.jar_path = jp
192
        # Preference 2: if a PROCESSORS_SERVER environment variable is defined, check its validity
193
        elif ProcessorsAPI.PROC_VAR in os.environ:
194
            self.logger.info("Using path given via $PROCESSORS_SERVER")
195
            jp = full_path(os.environ[ProcessorsAPI.PROC_VAR])
196
            # check if path is valid
197
            if os.path.exists(jp):
198
                self.jar_path = jp
199
            else:
200
                self.jar_path = None
201
                self.logger.warn("WARNING: {0} path is invalid.  \nPlease verify this entry in your environment:\n\texport {0}=/path/to/processors-server.jar".format(ProcessorsAPI.PROC_VAR))
202
        # Preference 3: attempt to use the processors-sever.jar (download if not found)
203
        # check if jar exists
204
        if not self.jar_path or not os.path.exists(self.jar_path):
205
            self.logger.info("No jar found.  Downloading to {} ...".format(ProcessorsAPI.DEFAULT_JAR))
206
            ProcessorsAPI._download_jar()
207
            self.jar_path = ProcessorsAPI.DEFAULT_JAR
208
209
    def start_server(self, jar_path, **kwargs):
210
        """
211
        Starts processors-sever.jar
212
        """
213
        self.port = kwargs.get("port", self.port)
214
        self.hostname = kwargs.get("hostname", self.hostname)
215
        self.jvm_mem = kwargs.get("jvm_mem", self.jvm_mem)
216
        self.timeout = int(float(kwargs.get("timeout", self.jvm_mem))/2)
217
        jp = full_path(jar_path)
218
        if jp:
219
            self.jar_path = jp
220
            self._start_server()
221
        else:
222
            raise Exception("Please provide jar_path=\"path/to/processors-server.jar\"")
223
224
    def stop_server(self, port=None):
225
        """
226
        Sends a poison pill to the server and waits for shutdown response
227
        """
228
        port = port or self.port
229
        address = "http://{}:{}".format(self.hostname, port)
230
        shutdown_address = "{}/shutdown".format(address)
231
        # attempt shutdown
232
        try:
233
            response = requests.post(shutdown_address)
234
            if response:
235
                print(response.content.decode("utf-8"))
236
            return True
237
        # will fail if the server is already down
238
        except Exception as e:
239
            pass
240
        return False
241
242
    def _ensure_jar_path_exists(self):
243
        # check if jar exists
244
        if not os.path.exists(self.jar_path):
245
            raise Exception("jar not found at {}".format(self.jar_path))
246
247
    def _start_server(self, port=None):
248
        """
249
        "Private" method called by start_server()
250
        """
251
252
        # does the jar exist?
253
        self._ensure_jar_path_exists()
254
255
        if port:
256
            self.port = port
257
        # build the command
258
        cmd = self._start_command.format(mem=self.jvm_mem, jp=self.jar_path, port=self.port, host=self.hostname)
259
        self._process = sp.Popen(shlex.split(cmd),
260
                                 shell=False,
261
                                 stderr=open(self.log_file, 'wb'),
262
                                 stdout=open(self.log_file, 'wb'),
263
                                 universal_newlines=True)
264
265
        self.logger.info("Starting processors-server ({}) ...".format(cmd))
266
        print("\nWaiting for server...")
267
268
        progressbar_length = int(self.timeout/self.wait_time)
269
        for i in range(progressbar_length):
270
            try:
271
                success = self.annotate("blah")
272
                if success:
273
                    print("\n\nConnection with processors-server established ({})".format(self.address))
274
                    return True
275
                sys.stdout.write("\r[{:{}}]".format('='*i, progressbar_length))
276
                time.sleep(self.wait_time)
277
            except Exception as e:
278
                raise(e)
279
280
        # if the server still hasn't started, raise an Exception
281
        raise Exception("Couldn't connect to processors-server. Is the port in use?")
282
283
    def make_address(self, hostname, port):
284
        # update hostname
285
        self.hostname = hostname
286
        # update port
287
        self.port = port
288
        # update address
289
        self.address = "http://{}:{}".format(self.hostname, self.port)
290
291
    @staticmethod
292
    def _download_jar(jar_url="http://www.cs.arizona.edu/~hahnpowell/processors-server/current/processors-server.jar"):
293
        # download processors-server.jar
294
        ppjar = ProcessorsAPI.DEFAULT_JAR
295
        percent = 0
296
        def dlProgress(count, blockSize, totalSize):
297
            percent = int(count*blockSize*100/totalSize)
298
            sys.stdout.write("\r{}% complete".format(percent))
299
            sys.stdout.flush()
300
301
        print("Downloading {} from {} ...".format(ppjar, jar_url))
302
        urlretrieve(jar_url, ppjar, reporthook=dlProgress)
303
        print("\nDownload Complete! {}".format(ppjar))
304
305
306
    def _get_path(self, p):
307
        """
308
        Expand a user-specified path.  Supports "~" shortcut.
309
        """
310
        return os.path.abspath(os.path.normpath(os.path.expanduser(p)))
311
312
    def __del__(self):
313
        """
314
        Stop server unless otherwise specified
315
        """
316
        if not self.keep_alive:
317
            try:
318
                self.stop_server()
319
                # close our file object
320
                #self.DEVNULL.close()
321
                print("Successfully shut down processors-server!")
322
            except Exception as e:
323
                self.logger.debug(e)
324
                print("Couldn't kill processors-server.  Was server started externally?")
325