| Total Complexity | 3 |
| Total Lines | 19 |
| Duplicated Lines | 0 % |
| Changes | 2 | ||
| Bugs | 0 | Features | 0 |
| 1 | """Backend that returns most similar subjects based on similarity in sparse |
||
| 25 | class TFIDFBackend(backend.AnnifBackend): |
||
|
1 ignored issue
–
show
|
|||
| 26 | name = "tfidf" |
||
| 27 | |||
| 28 | def _atomic_save(self, obj, dirname, filename): |
||
| 29 | tempfd, tempfilename = tempfile.mkstemp(prefix=filename, dir=dirname) |
||
| 30 | os.close(tempfd) |
||
| 31 | obj.save(tempfilename) |
||
| 32 | os.rename(tempfilename, os.path.join(dirname, filename)) |
||
| 33 | |||
| 34 | def load_subjects(self, subjects, analyzer): |
||
| 35 | corpus = subjects.tokens(analyzer) |
||
| 36 | dictionary = gensim.corpora.Dictionary(corpus) |
||
| 37 | self._atomic_save(dictionary, self._get_datadir(), 'dictionary') |
||
| 38 | veccorpus = VectorCorpus(corpus, dictionary) |
||
| 39 | tfidf = gensim.models.TfidfModel(veccorpus) |
||
| 40 | self._atomic_save(tfidf, self._get_datadir(), 'tfidf') |
||
| 41 | |||
| 42 | def analyze(self, text): |
||
| 43 | return [] # TODO |
||
| 44 |