Passed
Pull Request — master (#47)
by Frank
01:34
created

sutime.sutime.SUTime.__init__()   B

Complexity

Conditions 7

Size

Total Lines 62
Code Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 31
dl 0
loc 62
rs 7.736
c 0
b 0
f 0
cc 7
nop 7

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
# -*- coding: utf-8 -*-
2
"""A Python wrapper for Stanford CoreNLP's SUTime."""
3
4
import glob
5
import importlib
6
import json
7
import logging
8
import os
9
import socket
10
import sys
11
import threading
12
from pathlib import Path
13
from typing import Dict, List, Optional
14
15
import jpype  # pyre-ignore[21]
16
17
SOCKED_DEFAULT_TIMEOUT = 15
18
socket.setdefaulttimeout(SOCKED_DEFAULT_TIMEOUT)
19
20
21
class SUTime(object):
22
    """Python wrapper for SUTime (CoreNLP) by Stanford."""
23
24
    _sutime_python_jar = 'stanford-corenlp-sutime-python-1.4.0.jar'
25
    _sutime_java_class = 'edu.stanford.nlp.python.SUTimeWrapper'
26
    _corenlp_version = '4.0.0'
27
28
    # full name or ISO 639-1 code
29
    _languages = {
30
        'arabic': 'arabic',
31
        'ar': 'arabic',
32
        'chinese': 'chinese',
33
        'zh': 'chinese',
34
        'english': 'english',
35
        'british': 'british',
36
        'en': 'english',
37
        'french': 'french',
38
        'fr': 'french',
39
        'german': 'german',
40
        'de': 'german',
41
        'spanish': 'spanish',
42
        'es': 'spanish',
43
    }
44
45
    # https://github.com/stanfordnlp/CoreNLP/tree/master/src/edu/stanford/nlp/time/rules
46
    _supported_languages = {'british', 'english', 'spanish'}
47
48
    _required_jars = {
49
        'stanford-corenlp-{0}-models.jar'.format('4.0.0'),
50
        'stanford-corenlp-{0}.jar'.format('4.0.0'),
51
        'gson-2.8.6.jar',
52
        'slf4j-simple-1.7.30.jar',
53
    }
54
55
    def __init__(
56
        self,
57
        jars: Optional[str] = None,
58
        jvm_started: Optional[bool] = False,
59
        mark_time_ranges: Optional[bool] = False,
60
        include_range: Optional[bool] = False,
61
        jvm_flags: Optional[List[str]] = None,
62
        language: Optional[str] = 'english',
63
    ):
64
        """Initialize `SUTime` wrapper.
65
66
        Args:
67
            jars (Optional[str]): Path to previously downloaded SUTime Java
68
                dependencies. Defaults to False.
69
            jvm_started (Optional[bool]): Flag to indicate that JVM has been
70
                already started (with all Java dependencies loaded). Defaults
71
                to False.
72
            mark_time_ranges (Optional[bool]): SUTime flag for
73
                sutime.markTimeRanges. Defaults to False.
74
                "Whether or not to recognize time ranges such as 'July to
75
                August'"
76
            include_range (Optional[bool]): SUTime flag for
77
                sutime.includeRange. Defaults to False.
78
                "Whether or not to add range info to the TIMEX3 object"
79
            jvm_flags (Optional[List[str]]): List of flags passed to JVM. For
80
                example, this may be used to specify the maximum heap size
81
                using '-Xmx'. Has no effect if `jvm_started` is set to True.
82
                Defaults to None.
83
            language (Optional[str]): Selected language. Currently supported
84
                are: english (/en), british, spanish (/es). Defaults to
85
                `english`.
86
        """
87
        self.mark_time_ranges = mark_time_ranges
88
        self.include_range = include_range
89
        self._is_loaded = False
90
        self._lock = threading.Lock()
91
        module_root = Path(__file__).resolve().parent
92
        self.jars = Path(jars) if jars else module_root / 'jars'
93
94
        self._check_language_model_dependency(
95
            language.lower() if language else '',
96
        )
97
98
        if not jvm_started:
99
            self._classpath = self._create_classpath()
100
            self._start_jvm(jvm_flags)
101
102
        try:
103
            # make it thread-safe
104
            if threading.active_count() > 1:
105
                if not jpype.isThreadAttachedToJVM():
106
                    jpype.attachThreadToJVM()
107
            self._lock.acquire()
108
            wrapper = jpype.JClass(self._sutime_java_class)
109
            self._sutime = wrapper(
110
                self.mark_time_ranges, self.include_range, language,
111
            )
112
            self._is_loaded = True
113
        except Exception as exc:
114
            sys.exit('Could not load JVM: {0}'.format(exc))
115
        finally:
116
            self._lock.release()
117
118
    def parse(
119
        self, input_str: str, reference_date: Optional[str] = '',
120
    ) -> List[Dict]:
121
        """Parse datetime information out of string input.
122
123
        It invokes the SUTimeWrapper.annotate() function in Java.
124
125
        Args:
126
            input_str (str): The input as string that has to be parsed.
127
            reference_date (Optional[str]): Optional reference data for SUTime.
128
                Defaults to `''`.
129
130
        Returns:
131
            A list of dicts with the result from the `SUTimeWrapper.annotate()`
132
            call.
133
134
        Raises:
135
            RuntimeError: An error occurs when CoreNLP is not loaded.
136
        """
137
        if self._is_loaded is False:
138
            raise RuntimeError('Please load SUTime first!')
139
140
        if reference_date:
141
            return json.loads(str(self._sutime.annotate(
142
                input_str, reference_date,
143
            )))
144
        return json.loads(str(self._sutime.annotate(input_str)))
145
146
    def _check_language_model_dependency(self, language: str):
147
        if language not in self._languages:
148
            raise RuntimeError('Unsupported language: {0}'.format(language))
149
        normalized_language = self._languages[language]
150
151
        if normalized_language not in self._supported_languages:
152
            logging.warning('{0}: {1}. {2}.'.format(
153
                normalized_language.capitalize(),
154
                'is not (yet) supported by SUTime',
155
                'Falling back to default model',
156
            ))
157
            return
158
159
        language_model_file = (
160
            self.jars / 'stanford-corenlp-{0}-models-{1}.jar'.format(
161
                self._corenlp_version,
162
                normalized_language,
163
            ))
164
165
        language_model_file_exists = glob.glob(str(language_model_file))
166
        is_english_language = normalized_language in {'english', 'british'}
167
168
        if not (language_model_file_exists or is_english_language):
169
            raise RuntimeError(
170
                'Missing language model for {0}! Run {1} {2} {3}'.format(
171
                    self._languages[language].capitalize(),
172
                    'mvn dependency:copy-dependencies',
173
                    '-DoutputDirectory=./sutime/jars -P',
174
                    self._languages[language],
175
                ),
176
            )
177
178
    def _start_jvm(self, additional_flags: Optional[List[str]]):
179
        flags = ['-Djava.class.path={0}'.format(self._classpath)]
180
        if additional_flags:
181
            flags.extend(additional_flags)
182
        logging.info('jpype.isJVMStarted(): {0}'.format(jpype.isJVMStarted()))
183
        if not jpype.isJVMStarted():
184
            jpype.startJVM(jpype.getDefaultJVMPath(), *flags)
185
186
    def _create_classpath(self):
187
        sutime_jar = (
188
            Path(importlib.util.find_spec('sutime').origin).parent /
189
            'jars' / self._sutime_python_jar
190
        )
191
        jars = [sutime_jar]
192
        jar_file_names = []
193
        for top, _, files in os.walk(self.jars):
194
            for file_name in files:
195
                if file_name.endswith('.jar'):
196
                    jars.append(Path(top, file_name))
197
                    jar_file_names.append(file_name)
198
        if not self._required_jars.issubset(jar_file_names):
199
            logging.warning([
200
                jar for jar in self._required_jars if jar not in jar_file_names
201
            ])
202
            raise RuntimeError(
203
                'Not all necessary Java dependencies have been downloaded!',
204
            )
205
        return os.pathsep.join(str(jar) for jar in jars)
206