| @@ 208-227 (lines=20) @@ | ||
| 205 | ) |
|
| 206 | atomic_save_folder(self._model, model_path) |
|
| 207 | ||
| 208 | def _train( |
|
| 209 | self, |
|
| 210 | corpus: DocumentCorpus, |
|
| 211 | params: dict[str, Any], |
|
| 212 | jobs: int = 0, |
|
| 213 | ) -> None: |
|
| 214 | if corpus == "cached": |
|
| 215 | self.info("Reusing cached training data from previous run.") |
|
| 216 | else: |
|
| 217 | if corpus.is_empty(): |
|
| 218 | raise NotSupportedException("Cannot t project with no documents") |
|
| 219 | input = (doc.text for doc in corpus.documents) |
|
| 220 | vecparams = { |
|
| 221 | "min_df": int(params["min_df"]), |
|
| 222 | "tokenizer": self.project.analyzer.tokenize_words, |
|
| 223 | "ngram_range": (1, int(params["ngram"])), |
|
| 224 | } |
|
| 225 | veccorpus = self.create_vectorizer(input, vecparams) |
|
| 226 | self._create_train_files(veccorpus, corpus) |
|
| 227 | self._create_model(params, jobs) |
|
| 228 | ||
| 229 | def _suggest_batch( |
|
| 230 | self, texts: list[str], params: dict[str, Any] |
|
| @@ 107-128 (lines=22) @@ | ||
| 104 | self._model = omikuji.Model.train_on_data(train_path, hyper_param, jobs or None) |
|
| 105 | annif.util.atomic_save_folder(self._model, model_path) |
|
| 106 | ||
| 107 | def _train( |
|
| 108 | self, |
|
| 109 | corpus: DocumentCorpus, |
|
| 110 | params: dict[str, Any], |
|
| 111 | jobs: int = 0, |
|
| 112 | ) -> None: |
|
| 113 | if corpus != "cached": |
|
| 114 | if corpus.is_empty(): |
|
| 115 | raise NotSupportedException( |
|
| 116 | "Cannot train omikuji project with no documents" |
|
| 117 | ) |
|
| 118 | input = (doc.text for doc in corpus.documents) |
|
| 119 | vecparams = { |
|
| 120 | "min_df": int(params["min_df"]), |
|
| 121 | "tokenizer": self.project.analyzer.tokenize_words, |
|
| 122 | "ngram_range": (1, int(params["ngram"])), |
|
| 123 | } |
|
| 124 | veccorpus = self.create_vectorizer(input, vecparams) |
|
| 125 | self._create_train_file(veccorpus, corpus) |
|
| 126 | else: |
|
| 127 | self.info("Reusing cached training data from previous run.") |
|
| 128 | self._create_model(params, jobs) |
|
| 129 | ||
| 130 | def _suggest_batch( |
|
| 131 | self, texts: list[str], params: dict[str, Any] |
|