Completed
Push — master ( c4019d...466f9c )
by Gus
01:16
created

ProcessorsAPI._check_server_version()   A

Complexity

Conditions 3

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
dl 0
loc 15
rs 9.4285
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
from __future__ import unicode_literals
4
#from pkg_resources import resource_filename
5
from six.moves.urllib.request import urlretrieve
6
from .utils import *
7
from .processors import *
8
from .sentiment import SentimentAnalysisAPI
9
from .odin import OdinAPI
10
import os
11
import shlex
12
import os
13
import subprocess as sp
14
import requests
15
import time
16
import sys
17
import logging
18
import warnings
19
20
21
class ProcessorsAPI(object):
22
23
    """
24
    Manages a connection with the processors-server jar and provides an interface to the API.
25
26
    Parameters
27
    ----------
28
    port : int
29
        The port the server is running on or should be started on.  Default is 8886.
30
    hostname : str
31
        The host name to use for the server.  Default is "localhost".
32
    timeout : int
33
        The number of seconds to wait for the server to initialize.  Default is 120.
34
    jvm_mem : str
35
        The maximum amount of memory to allocate to the JVM for the server.  Default is "-Xmx3G".
36
    jar_path : str
37
        The path to the processors-server jar.  Default is the jar installed with the package.
38
    kee_alive : bool
39
        Whether or not to keep the server running when ProcessorsAPI instance goes out of scope.  Default is false (server is shut down).
40
    log_file: str
41
        The path for the log file.  Default is .py-processors.log in the user's home directory.
42
43
    Methods
44
    -------
45
    annotate(text)
46
        Produces a Document from the provided `text` using the default processor.
47
    fastnlp.annotate(text)
48
        Produces a Document from the provided `text` using FastNLPProcessor.
49
    bionlp.annotate(text)
50
        Produces a Document from the provided `text` using BioNLPProcessor.
51
    annotate_from_sentences(sentences)
52
        Produces a Document from `sentences` (a list of text split into sentences). Uses the default processor.
53
    fastnlp.annotate_from_sentences(sentences)
54
        Produces a Document from `sentences` (a list of text split into sentences). Uses FastNLPProcessor.
55
    bionlp.annotate_from_sentences(sentences)
56
        Produces a Document from `sentences` (a list of text split into sentences). Uses BioNLPProcessor.
57
    corenlp.sentiment.score_sentence(sentence)
58
        Produces a sentiment score for the provided `sentence` (an instance of Sentence).
59
    corenlp.sentiment.score_document(doc)
60
        Produces sentiment scores for the provided `doc` (an instance of Document).  One score is produced for each sentence.
61
    corenlp.sentiment.score_segmented_text
62
        Produces sentiment scores for the provided `sentences` (a list of text segmented into sentences).  One score is produced for item in `sentences`.
63
    odin.extract_from_text(text, rules)
64
        Produces a list of Mentions for matches of the provided `rules` on the `text`.  `rules` can be a string of Odin rules, or a url ending in .yml or yaml.
65
    odin.extract_from_document(doc, rules)
66
        Produces a list of Mentions for matches of the provided `rules` on the `doc` (an instance of Document).  `rules` can be a string of Odin rules, or a url ending in .yml or yaml.
67
    start_server(jar_path, **kwargs)
68
        Starts the server using the provided `jar_path`.  Optionally takes hostname, port, jvm_mem, and timeout.
69
    stop_server()
70
        Attempts to stop the server running at self.address.
71
    """
72
73
    PROC_VAR = 'PROCESSORS_SERVER'
74
    TIMEOUT = 120
75
    # save to lib loc
76
    DEFAULT_JAR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "processors-server.jar")
77
    PORT = 8886
78
    JVM_MEM = "-Xmx3G"
79
    HOST = "localhost"
80
    LOG = full_path(os.path.join("~", ".py-processors.log"))
81
    #print(resource_filename(__name__, "processors-server.jar"))
82
83
    def __init__(self, **kwargs):
84
85
        self.hostname = kwargs.get("hostname", ProcessorsAPI.HOST)
86
        self.port = kwargs.get("port", ProcessorsAPI.PORT)
87
        self.make_address(self.hostname, self.port)
88
        self.timeout = kwargs.get("timeout", ProcessorsAPI.TIMEOUT)
89
        self.jvm_mem = kwargs.get("jvm_mem", ProcessorsAPI.JVM_MEM)
90
        self._start_command = "java {mem} -cp {jp} NLPServer --port {port} --host {host}" # mem, jar path, port, host
91
        # whether or not to stop the server when the object is destroyed
92
        self.keep_alive = kwargs.get("keep_alive", False)
93
        # how long to wait between requests
94
        self.wait_time = 2
95
        # processors
96
        self.default = Processor(self.address)
97
        self.fastnlp = FastNLPProcessor(self.address)
98
        self.bionlp = BioNLPProcessor(self.address)
99
        # sentiment
100
        self.sentiment = SentimentAnalysisAPI(self.address)
101
        # odin
102
        self.odin = OdinAPI(self.address)
103
        # use the os module's devnull for compatibility with python 2.7
104
        #self.DEVNULL = open(os.devnull, 'wb')
105
        self.logger = logging.getLogger(__name__)
106
        self.log_file = self._prepare_log_file(kwargs.get("log_file", ProcessorsAPI.LOG))
107
108
        # resolve jar path
109
        self.resolve_jar_path(kwargs.get("jar_path", ProcessorsAPI.DEFAULT_JAR))
110
        # attempt to establish connection with server
111
        self.establish_connection()
112
113
    def _check_server_version(self):
114
        """
115
        Checks server version to see if it meets the recommendations
116
        """
117
        # avoid circular imports by delaying this import
118
        from .__init__ import __ps_rec__
119
        try:
120
            service_address = "{}/version".format(self.address)
121
            server_version = post_json(service_address, None)["version"]
122
            if float(__ps_rec__) != float(server_version):
123
                warnings.warn("Recommended server version is {}, but server version is {}".format(__ps_rec__, server_version))
124
            else:
125
                self.logger.info("Server version meets recommendations (v{})".format(__ps_rec__))
126
        except:
127
            warnings.warn("Unable to determine server version.  Recommended version is {}".format(__ps_rec__))
128
129
130
    def _prepare_log_file(self, lf):
131
        """
132
        Configure logger and return file path for logging
133
        """
134
        # log_file
135
        log_file = ProcessorsAPI.LOG if not lf else os.path.expanduser(lf)
136
        # configure logger
137
        self.logger.setLevel(logging.DEBUG)
138
        # create console handler and set level to info
139
        handler = logging.StreamHandler()
140
        handler.setLevel(logging.INFO)
141
        formatter = logging.Formatter("%(levelname)s - %(message)s")
142
        handler.setFormatter(formatter)
143
        self.logger.addHandler(handler)
144
        # create debug file handler and set level to debug
145
        handler = logging.FileHandler(log_file, "w")
146
        handler.setLevel(logging.DEBUG)
147
        formatter = logging.Formatter("%(levelname)s - %(message)s")
148
        handler.setFormatter(formatter)
149
        self.logger.addHandler(handler)
150
        return log_file
151
152
    def annotate(self, text):
153
        """
154
        Uses default processor (CoreNLP) to annotate text.  Included for backwards compatibility.
155
        """
156
        return self.default.annotate(text)
157
158
    def annotate_from_sentences(self, sentences):
159
        """
160
        Uses default processor (CoreNLP) to annotate a list of segmented sentences.
161
        """
162
        return self.default.annotate_from_sentences(sentences)
163
164
    def establish_connection(self):
165
        """
166
        Attempt to connect to a server (assumes server is running)
167
        """
168
        if self.annotate("Blah"):
169
            self.logger.info("Connection with server established!")
170
            self._check_server_version()
171
        else:
172
            try:
173
                # Attempt to start the server
174
                self._start_server()
175
            except Exception as e:
176
                if not os.path.exists(self.jar_path):
177
                    self.logger.warn("\nprocessors-server.jar not found at {}.".format(self.jar_path))
178
                self.logger.warn("Unable to start server. Please start the server manually with .start_server(jar_path=\"path/to/processors-server.jar\")")
179
                self.logger.warn("\n{}".format(e))
180
181
    def resolve_jar_path(self, jar_path):
182
        """
183
        Attempts to preferentially set value of self.jar_path
184
        """
185
        # Preference 1: if a .jar is given, check to see if the path is valid
186
        if jar_path:
187
            jp = full_path(jar_path)
188
            # check if path is valid
189
            if os.path.exists(jp):
190
                self.jar_path = jp
191
        else:
192
            # Preference 2: if a PROCESSORS_SERVER environment variable is defined, check its validity
193
            if ProcessorsAPI.PROC_VAR in os.environ:
194
                print("Using path given via $PROCESSORS_SERVER")
195
                jp = full_path(os.environ[ProcessorsAPI.PROC_VAR])
196
                # check if path is valid
197
                if os.path.exists(jp):
198
                    self.jar_path = jp
199
                else:
200
                    self.jar_path = None
201
                    print("WARNING: {0} path is invalid.  \nPlease verify this entry in your environment:\n\texport {0}=/path/to/processors-server.jar".format(ProcessorsAPI.PROC_VAR))
202
            # Preference 3: attempt to use the processors-sever.jar (download if not found)
203
            else:
204
                print("Using default")
205
                # check if jar exists
206
                if not os.path.exists(ProcessorsAPI.DEFAULT_JAR):
207
                    ProcessorsAPI._download_jar()
208
                self.jar_path = ProcessorsAPI.DEFAULT_JAR
209
210
    def start_server(self, jar_path, **kwargs):
211
        """
212
        Starts processors-sever.jar
213
        """
214
        self.port = kwargs.get("port", self.port)
215
        self.hostname = kwargs.get("hostname", self.hostname)
216
        self.jvm_mem = kwargs.get("jvm_mem", self.jvm_mem)
217
        self.timeout = int(float(kwargs.get("timeout", self.jvm_mem))/2)
218
        jp = full_path(jar_path)
219
        if jp:
220
            self.jar_path = jp
221
            self._start_server()
222
        else:
223
            raise Exception("Please provide jar_path=\"path/to/processors-server.jar\"")
224
225
    def stop_server(self, port=None):
226
        """
227
        Sends a poison pill to the server and waits for shutdown response
228
        """
229
        port = port or self.port
230
        address = "http://{}:{}".format(self.hostname, port)
231
        shutdown_address = "{}/shutdown".format(address)
232
        # attempt shutdown
233
        try:
234
            response = requests.post(shutdown_address)
235
            if response:
236
                print(response.content.decode("utf-8"))
237
            return True
238
        # will fail if the server is already down
239
        except Exception as e:
240
            pass
241
        return False
242
243
    def _ensure_jar_path_exists(self):
244
        # check if jar exists
245
        if not os.path.exists(self.jar_path):
246
            raise Exception("jar not found at {}".format(self.jar_path))
247
248
    def _start_server(self, port=None):
249
        """
250
        "Private" method called by start_server()
251
        """
252
253
        # does the jar exist?
254
        self._ensure_jar_path_exists()
255
256
        if port:
257
            self.port = port
258
        # build the command
259
        cmd = self._start_command.format(mem=self.jvm_mem, jp=self.jar_path, port=self.port, host=self.hostname)
260
        self._process = sp.Popen(shlex.split(cmd),
261
                                 shell=False,
262
                                 stderr=open(self.log_file, 'wb'),
263
                                 stdout=open(self.log_file, 'wb'),
264
                                 universal_newlines=True)
265
266
        self.logger.info("Starting processors-server ({}) ...".format(cmd))
267
        print("\nWaiting for server...")
268
269
        progressbar_length = int(self.timeout/self.wait_time)
270
        for i in range(progressbar_length):
271
            try:
272
                success = self.annotate("blah")
273
                if success:
274
                    print("\n\nConnection with processors-server established ({})".format(self.address))
275
                    return True
276
                sys.stdout.write("\r[{:{}}]".format('='*i, progressbar_length))
277
                time.sleep(self.wait_time)
278
            except Exception as e:
279
                raise(e)
280
281
        # if the server still hasn't started, raise an Exception
282
        raise Exception("Couldn't connect to processors-server. Is the port in use?")
283
284
    def make_address(self, hostname, port):
285
        # update hostname
286
        self.hostname = hostname
287
        # update port
288
        self.port = port
289
        # update address
290
        self.address = "http://{}:{}".format(self.hostname, self.port)
291
292
    @staticmethod
293
    def _download_jar(jar_url="http://www.cs.arizona.edu/~hahnpowell/processors-server/current/processors-server.jar"):
294
        # download processors-server.jar
295
        ppjar = ProcessorsAPI.DEFAULT_JAR
296
        percent = 0
297
        def dlProgress(count, blockSize, totalSize):
298
            percent = int(count*blockSize*100/totalSize)
299
            sys.stdout.write("\r{}% complete".format(percent))
300
            sys.stdout.flush()
301
302
        print("Downloading {} from {} ...".format(ppjar, jar_url))
303
        urlretrieve(jar_url, ppjar, reporthook=dlProgress)
304
        print("\nDownload Complete! {}".format(ppjar))
305
306
307
    def _get_path(self, p):
308
        """
309
        Expand a user-specified path.  Supports "~" shortcut.
310
        """
311
        return os.path.abspath(os.path.normpath(os.path.expanduser(p)))
312
313
    def __del__(self):
314
        """
315
        Stop server unless otherwise specified
316
        """
317
        if not self.keep_alive:
318
            try:
319
                self.stop_server()
320
                # close our file object
321
                #self.DEVNULL.close()
322
                print("Successfully shut down processors-server!")
323
            except Exception as e:
324
                self.logger.debug(e)
325
                print("Couldn't kill processors-server.  Was server started externally?")
326