Code Duplication    Length = 14-14 lines in 2 locations

annif/backend/omikuji.py 1 location

@@ 104-117 (lines=14) @@
101
        self._model = omikuji.Model.train_on_data(train_path, hyper_param, jobs or None)
102
        annif.util.atomic_save_folder(self._model, model_path)
103
104
    def _train(self, corpus, params, jobs=0):
105
        if corpus != "cached":
106
            if corpus.is_empty():
107
                raise NotSupportedException(
108
                    "Cannot train omikuji project with no documents"
109
                )
110
            input = (doc.text for doc in corpus.documents)
111
            vecparams = {
112
                "min_df": int(params["min_df"]),
113
                "tokenizer": self.project.analyzer.tokenize_words,
114
                "ngram_range": (1, int(params["ngram"])),
115
            }
116
            veccorpus = self.create_vectorizer(input, vecparams)
117
            self._create_train_file(veccorpus, corpus)
118
        else:
119
            self.info("Reusing cached training data from previous run.")
120
        self._create_model(params, jobs)

annif/backend/xtransformer.py 1 location

@@ 204-217 (lines=14) @@
201
        )
202
        atomic_save_folder(self._model, model_path)
203
204
    def _train(self, corpus, params, jobs=0):
205
        if corpus == "cached":
206
            self.info("Reusing cached training data from previous run.")
207
        else:
208
            if corpus.is_empty():
209
                raise NotSupportedException("Cannot t project with no documents")
210
            input = (doc.text for doc in corpus.documents)
211
            vecparams = {
212
                "min_df": int(params["min_df"]),
213
                "tokenizer": self.project.analyzer.tokenize_words,
214
                "ngram_range": (1, int(params["ngram"])),
215
            }
216
            veccorpus = self.create_vectorizer(input, vecparams)
217
            self._create_train_files(veccorpus, corpus)
218
        self._create_model(params, jobs)
219
220
    def _suggest(self, text, params):