Code Duplication    Length = 14-14 lines in 2 locations

annif/backend/omikuji.py 1 location

@@ 103-116 (lines=14) @@
100
            model_path,
101
            None)
102
103
    def _train(self, corpus, params, jobs=0):
104
        if corpus != 'cached':
105
            if corpus.is_empty():
106
                raise NotSupportedException(
107
                    'Cannot train omikuji project with no documents')
108
            input = (doc.text for doc in corpus.documents)
109
            vecparams = {'min_df': int(params['min_df']),
110
                         'tokenizer': self.project.analyzer.tokenize_words,
111
                         'ngram_range': (1, int(params['ngram']))}
112
            veccorpus = self.create_vectorizer(input, vecparams)
113
            self._create_train_file(veccorpus, corpus)
114
        else:
115
            self.info("Reusing cached training data from previous run.")
116
        self._create_model(params, jobs)
117
118
    def _suggest(self, text, params):
119
        self.debug('Suggesting subjects for text "{}..." (len={})'.format(

annif/backend/xtransformer.py 1 location

@@ 208-221 (lines=14) @@
205
            )
206
        atomic_save(self._model, model_path, None)
207
208
    def _train(self, corpus, params, jobs=0):
209
        if corpus == 'cached':
210
            self.info("Reusing cached training data from previous run.")
211
        else:
212
            if corpus.is_empty():
213
                raise NotSupportedException(
214
                    'Cannot t project with no documents')
215
            input = (doc.text for doc in corpus.documents)
216
            vecparams = {'min_df': int(params['min_df']),
217
                         'tokenizer': self.project.analyzer.tokenize_words,
218
                         'ngram_range': (1, int(params['ngram']))}
219
            veccorpus = self.create_vectorizer(input, vecparams)
220
            self._create_train_files(veccorpus, corpus)
221
        self._create_model(params, jobs)
222
223
    def _suggest(self, text, params):
224
        text = ' '.join(text.split())