Code Duplication    Length = 14-14 lines in 2 locations

annif/backend/xtransformer.py 1 location

@@ 209-222 (lines=14) @@
206
            )
207
        atomic_save_folder(self._model, model_path)
208
209
    def _train(self, corpus, params, jobs=0):
210
        if corpus == 'cached':
211
            self.info("Reusing cached training data from previous run.")
212
        else:
213
            if corpus.is_empty():
214
                raise NotSupportedException(
215
                    'Cannot t project with no documents')
216
            input = (doc.text for doc in corpus.documents)
217
            vecparams = {'min_df': int(params['min_df']),
218
                         'tokenizer': self.project.analyzer.tokenize_words,
219
                         'ngram_range': (1, int(params['ngram']))}
220
            veccorpus = self.create_vectorizer(input, vecparams)
221
            self._create_train_files(veccorpus, corpus)
222
        self._create_model(params, jobs)
223
224
    def _suggest(self, text, params):
225
        text = ' '.join(text.split())

annif/backend/omikuji.py 1 location

@@ 102-115 (lines=14) @@
99
            self._model,
100
            model_path)
101
102
    def _train(self, corpus, params, jobs=0):
103
        if corpus != 'cached':
104
            if corpus.is_empty():
105
                raise NotSupportedException(
106
                    'Cannot train omikuji project with no documents')
107
            input = (doc.text for doc in corpus.documents)
108
            vecparams = {'min_df': int(params['min_df']),
109
                         'tokenizer': self.project.analyzer.tokenize_words,
110
                         'ngram_range': (1, int(params['ngram']))}
111
            veccorpus = self.create_vectorizer(input, vecparams)
112
            self._create_train_file(veccorpus, corpus)
113
        else:
114
            self.info("Reusing cached training data from previous run.")
115
        self._create_model(params, jobs)
116
117
    def _suggest(self, text, params):
118
        self.debug('Suggesting subjects for text "{}..." (len={})'.format(