| 1 |  |  | """Common functionality for backends.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import abc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | import os.path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from datetime import datetime, timezone | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from glob import glob | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from annif import logger | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | class AnnifBackend(metaclass=abc.ABCMeta): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     """Base class for Annif backends that perform analysis. The | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     non-implemented methods should be overridden in subclasses.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     name = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     DEFAULT_PARAMETERS = {"limit": 100} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     def __init__(self, backend_id, config_params, project): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |         """Initialize backend with specific parameters. The | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         parameters are a dict. Keys and values depend on the specific | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |         backend type.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         self.backend_id = backend_id | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |         self.config_params = config_params | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |         self.project = project | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |         self.datadir = project.datadir | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 27 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 28 |  |  |     def default_params(self): | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  |         return self.DEFAULT_PARAMETERS | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     def params(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         params = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |         params.update(self.default_params()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         params.update(self.config_params) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |         return params | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     def is_trained(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         return bool(glob(os.path.join(self.datadir, "*"))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     def modification_time(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         mtimes = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |             datetime.utcfromtimestamp(os.path.getmtime(p)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |             for p in glob(os.path.join(self.datadir, "*")) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         ] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         most_recent = max(mtimes, default=None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         if most_recent is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |             return None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         return most_recent.replace(tzinfo=timezone.utc) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     def _get_backend_params(self, params): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         backend_params = dict(self.params) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         if params is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |             backend_params.update(params) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         return backend_params | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     def _train(self, corpus, params, jobs=0): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         """This method can be overridden by backends. It implements | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         the train functionality, with pre-processed parameters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         pass  # default is to do nothing, subclasses may override | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     def train(self, corpus, params=None, jobs=0): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         """Train the model on the given document or subject corpus.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         beparams = self._get_backend_params(params) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         return self._train(corpus, params=beparams, jobs=jobs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |     def initialize(self, parallel=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         """This method can be overridden by backends. It should cause the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         backend to pre-load all data it needs during operation. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         If parallel is True, the backend should expect to be used for | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |         parallel operation.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |     @abc.abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |     def _suggest(self, text, params): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         """This method should implemented by backends. It implements | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         the suggest functionality, with pre-processed parameters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |         pass  # pragma: no cover | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |     def _suggest_batch(self, texts, params): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         """This method can be implemented by backends to use batching of documents in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         their operations. This default implementation uses the regular suggest | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         functionality.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         return [self._suggest(text, params) for text in texts] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |     def suggest(self, text, params=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         """Suggest subjects for the input text and return a list of subjects | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         represented as a list of SubjectSuggestion objects.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         beparams = self._get_backend_params(params) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         self.initialize() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         return self._suggest(text, params=beparams) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     def suggest_batch(self, texts, params=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         """Suggest subjects for the input documents and return a list of subject sets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         represented as a list of SubjectSuggestion objects.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         beparams = self._get_backend_params(params) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         self.initialize() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |         return self._suggest_batch(texts, params=beparams) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     def debug(self, message): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         """Log a debug message from this backend""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         logger.debug("Backend {}: {}".format(self.backend_id, message)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |     def info(self, message): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |         """Log an info message from this backend""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         logger.info("Backend {}: {}".format(self.backend_id, message)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |     def warning(self, message): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         """Log a warning message from this backend""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |         logger.warning("Backend {}: {}".format(self.backend_id, message)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  | class AnnifLearningBackend(AnnifBackend): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |     """Base class for Annif backends that can perform online learning""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |     @abc.abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |     def _learn(self, corpus, params): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         """This method should implemented by backends. It implements the learn | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         functionality, with pre-processed parameters.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         pass  # pragma: no cover | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |     def learn(self, corpus, params=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         """Further train the model on the given document or subject corpus.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         beparams = self._get_backend_params(params) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 127 |  |  |         return self._learn(corpus, params=beparams) | 
            
                                                        
            
                                    
            
            
                | 128 |  |  |  |