1
|
|
|
from abc import ABCMeta, abstractmethod |
|
|
|
|
2
|
|
|
|
3
|
|
|
from six import with_metaclass |
4
|
|
|
import jsonpickle |
5
|
|
|
|
6
|
|
|
from ._registry import registered_outputs |
7
|
|
|
|
8
|
|
|
|
9
|
|
|
class OutputInterface(with_metaclass(ABCMeta)): |
|
|
|
|
10
|
|
|
def __init__(self, *args, **kwargs): |
11
|
|
|
super(OutputInterface, self).__init__( *args, **kwargs) |
|
|
|
|
12
|
|
|
# should be an iterable with each member having (id, text) |
13
|
|
|
self.corpus = None |
14
|
|
|
# should be a dictionary-like structure, with string ids for tokenizer used and parameters |
15
|
|
|
# passed and dictionaries mapping doc id to list of tokens |
16
|
|
|
self.tokenized_corpora = None |
17
|
|
|
# should be a dictionary-like structure, with string ids for vectorizer used and parameters |
18
|
|
|
# passed and dictionaries mapping doc id to list of tokens |
19
|
|
|
self.vectorized_corpora = None |
20
|
|
|
# should be a dictionary-like structure, with string ids for model used and parameters passed |
21
|
|
|
# and dictionaries mapping doc id to list of tokens |
22
|
|
|
self.modeled_corpora = None |
23
|
|
|
|
24
|
|
|
def save(self, filename, saved_data=None): |
25
|
|
|
"""Persist this object to disk somehow. |
26
|
|
|
|
27
|
|
|
You can save your data in any number of files in any format, but at a minimum, you need one json file that |
28
|
|
|
describes enough to bootstrap the loading process. Namely, you must have a key called 'class' so that upon |
29
|
|
|
loading the output, the correct class can be instantiated and used to load any other data. You don't have |
30
|
|
|
to implement anything for saved_data, but it is stored as a key next to 'class'. |
31
|
|
|
|
32
|
|
|
""" |
33
|
|
|
with open(filename, "w") as f: |
|
|
|
|
34
|
|
|
f.write(jsonpickle.encode({"class": self.__class__.__name__, "saved_data": saved_data}, f)) |
35
|
|
|
|
36
|
|
|
def synchronize(self, max_wait, field): |
37
|
|
|
"""By default, operations are synchronous and no additional wait is |
38
|
|
|
necessary. Data sources that are asynchronous (ElasticSearch) may |
39
|
|
|
use this function to wait for "eventual consistency" """ |
40
|
|
|
pass |
41
|
|
|
|
42
|
|
|
@abstractmethod |
43
|
|
|
def get_filtered_data(self, field_to_get, filter=""): |
|
|
|
|
44
|
|
|
raise NotImplementedError |
45
|
|
|
|
46
|
|
|
def close(self): |
|
|
|
|
47
|
|
|
pass |
48
|
|
|
|
49
|
|
|
|
50
|
|
|
def load_output(filename): |
|
|
|
|
51
|
|
|
with open(filename) as f: |
|
|
|
|
52
|
|
|
output_details = jsonpickle.decode(f.read()) |
53
|
|
|
return registered_outputs[output_details['class']](**output_details["saved_data"]) |
54
|
|
|
|
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.